diff --git a/apps/llm/ios/Podfile.lock b/apps/llm/ios/Podfile.lock index 3c226c3076..16a673f2ad 100644 --- a/apps/llm/ios/Podfile.lock +++ b/apps/llm/ios/Podfile.lock @@ -2153,9 +2153,9 @@ PODS: - ReactCommon/turbomodule/core - Yoga - SocketRocket (0.7.1) - - sqlite3 (3.50.4): - - sqlite3/common (= 3.50.4) - - sqlite3/common (3.50.4) + - sqlite3 (3.50.1): + - sqlite3/common (= 3.50.1) + - sqlite3/common (3.50.1) - Yoga (0.0.0) DEPENDENCIES: @@ -2444,97 +2444,97 @@ EXTERNAL SOURCES: SPEC CHECKSUMS: boost: 7e761d76ca2ce687f7cc98e698152abd03a18f90 DoubleConversion: cb417026b2400c8f53ae97020b2be961b59470cb - EXConstants: 9f310f44bfedba09087042756802040e464323c0 - Expo: 4e8bda07d30b024b1732f87843a5349a3ecc1316 - ExpoAsset: 3bc9adb7dbbf27ae82c18ca97eb988a3ae7e73b1 - ExpoBrightness: c335c6ccc082d5249a4b38dba5cd9a08aa0bf62b - ExpoCalendar: f5f94ea8dcd957b1434beb4e1c0da1af063322e6 - ExpoFileSystem: c36eb8155eb2381c83dda7dc210e3eec332368b6 - ExpoFont: abbb91a911eb961652c2b0a22eef801860425ed6 - ExpoHead: af044f3e9c99e7d8d21bf653b4c2f2ef53a7f082 - ExpoKeepAwake: bf0811570c8da182bfb879169437d4de298376e7 - ExpoLinking: b85ff4eafeae6fc638c6cace60007ae521af0ef4 - ExpoModulesCore: d431ffe83c8673d02cb38425594a5f5480fd3061 + EXConstants: be238322d57d084dc055dbd5d6fe6479510504ce + Expo: 77b39f42396989cbe6fbef9f6fafc9b35186a95b + ExpoAsset: 3ea3275cca6a7793b3d36fbf1075c590f803fbcb + ExpoBrightness: 05e750736f8886dcf235212b0caf85b0f605fc88 + ExpoCalendar: 660542dc1c5ef98f46bedcc8745aa707df5d501a + ExpoFileSystem: 3a98ca2a6f13674ecfd97327d1b44a8ace444cbd + ExpoFont: 312c73403bbd4f98e1d6a5330641a56292583cd2 + ExpoHead: 5df88545652c2d3a3ea50bcd7f6be6ca935ac997 + ExpoKeepAwake: e8dedc115d9f6f24b153ccd2d1d8efcdfd68a527 + ExpoLinking: 5d151d4a497d7e375308602f0a89b4e8acf7b5f8 + ExpoModulesCore: e2e363bcdee87b46f858586d1887ebb215582001 fast_float: 06eeec4fe712a76acc9376682e4808b05ce978b6 FBLazyVector: 84b955f7b4da8b895faf5946f73748267347c975 fmt: a40bb5bd0294ea969aaaba240a927bd33d878cdd glog: 5683914934d5b6e4240e497e0f4a3b42d1854183 hermes-engine: 314be5250afa5692b57b4dd1705959e1973a8ebe opencv-rne: 2305807573b6e29c8c87e3416ab096d09047a7a0 - RCT-Folly: e78785aa9ba2ed998ea4151e314036f6c49e6d82 + RCT-Folly: 36fe2295e44b10d831836cc0d1daec5f8abcf809 RCTDeprecation: 83ffb90c23ee5cea353bd32008a7bca100908f8c RCTRequired: eb7c0aba998009f47a540bec9e9d69a54f68136e RCTTypeSafety: 659ae318c09de0477fd27bbc9e140071c7ea5c93 React: c2d3aa44c49bb34e4dfd49d3ee92da5ebacc1c1c React-callinvoker: 1bdfb7549b5af266d85757193b5069f60659ef9d - React-Core: 10597593fdbae06f0089881e025a172e51d4a769 - React-CoreModules: 6907b255529dd46895cf687daa67b24484a612c2 - React-cxxreact: a9f5b8180d6955bc3f6a3fcd657c4d9b4d95c1f6 + React-Core: 7150cf9b6a5af063b37003062689f1691e79c020 + React-CoreModules: 15a85e6665d61678942da6ae485b351f4c699049 + React-cxxreact: 74f9de59259ac951923f5726aa14f0398f167af9 React-debug: e74e76912b91e08d580c481c34881899ccf63da9 - React-defaultsnativemodule: 11f6ee2cf69bf3af9d0f28a6253def33d21b5266 - React-domnativemodule: f940bbc4fa9e134190acbf3a4a9f95621b5a8f51 - React-Fabric: 6f5c357bf3a42ff11f8844ad3fc7a1eb04f4b9de - React-FabricComponents: 10e0c0209822ac9e69412913a8af1ca33573379b - React-FabricImage: f582e764072dfa4715ae8c42979a5bace9cbcc12 + React-defaultsnativemodule: 628285212bbd65417d40ad6a9f8781830fda6c98 + React-domnativemodule: 185d9808198405c176784aaf33403d713bd24fb7 + React-Fabric: c814804affbe1952e16149ddd20256e1bccae67e + React-FabricComponents: 81ef47d596966121784afec9924f9562a29b1691 + React-FabricImage: f14f371d678aa557101def954ac3ba27e48948ff React-featureflags: d5facceff8f8f6de430e0acecf4979a9a0839ba9 - React-featureflagsnativemodule: a7dd141f1ef4b7c1331af0035689fbc742a49ff4 - React-graphics: 36ae3407172c1c77cea29265d2b12b90aaef6aa0 - React-hermes: 9116d4e6d07abeb519a2852672de087f44da8f12 - React-idlecallbacksnativemodule: ae7f5ffc6cf2d2058b007b78248e5b08172ad5c3 - React-ImageManager: 9daee0dc99ad6a001d4b9e691fbf37107e2b7b54 - React-jserrorhandler: 1e6211581071edaf4ecd5303147328120c73f4dc - React-jsi: 753ba30c902f3a41fa7f956aca8eea3317a44ee6 - React-jsiexecutor: 47520714aa7d9589c51c0f3713dfbfca4895d4f9 - React-jsinspector: cfd27107f6d6f1076a57d88c932401251560fe5f - React-jsinspectortracing: 76a7d791f3c0c09a0d2bf6f46dfb0e79a4fcc0ac - React-jsitooling: 995e826570dd58f802251490486ebd3244a037ab - React-jsitracing: 094ae3d8c123cea67b50211c945b7c0443d3e97b - React-logger: 8edfcedc100544791cd82692ca5a574240a16219 - React-Mapbuffer: c3f4b608e4a59dd2f6a416ef4d47a14400194468 - React-microtasksnativemodule: 054f34e9b82f02bd40f09cebd4083828b5b2beb6 - react-native-executorch: 88c3786c6346d5fbd62417b5c799e818568e6cc5 - react-native-safe-area-context: 562163222d999b79a51577eda2ea8ad2c32b4d06 - React-NativeModulesApple: 2c4377e139522c3d73f5df582e4f051a838ff25e + React-featureflagsnativemodule: 96f0ab285382d95c90f663e02526a5ceefa95a11 + React-graphics: 1a66ee0a3f093b125b853f6370296fadcaf6f233 + React-hermes: 8b86e5f54a65ecb69cdf22b3a00a11562eda82d2 + React-idlecallbacksnativemodule: 5c25ab145c602264d00cb26a397ab52e0efa031c + React-ImageManager: 15e34bd5ef1ac4a18e96660817ef70a7f99ee8c2 + React-jserrorhandler: 02cdf2cd45350108be1ffd2b164578936dbbdff7 + React-jsi: 6af1987cfbb1b6621664fdbf6c7b62bd4d38c923 + React-jsiexecutor: 51f372998e0303585cb0317232b938d694663cbd + React-jsinspector: 3539ad976d073bfaa8a7d2fa9bef35e70e55033e + React-jsinspectortracing: e8dbacaf67c201f23052ca1c2bae2f7b84dec443 + React-jsitooling: 95a34f41e3c249d42181de13b4f8d854f178ca9f + React-jsitracing: 25b029cf5cad488252d46da19dd8c4c134fd5fe4 + React-logger: 368570a253f00879a1e4fea24ed4047e72e7bbf3 + React-Mapbuffer: c04fcda1c6281fc0a6824c7dcc1633dd217ac1ec + React-microtasksnativemodule: ca2804a25fdcefffa0aa942aa23ab53b99614a34 + react-native-executorch: 66ffc33df70ec85bc591f9ee34c862835966ead3 + react-native-safe-area-context: 00d03dc688ba86664be66f9e3f203fc7d747d899 + React-NativeModulesApple: 452b86b29fae99ed0a4015dca3ad9cd222f88abf React-oscompat: ef5df1c734f19b8003e149317d041b8ce1f7d29c - React-perflogger: 9a151e0b4c933c9205fd648c246506a83f31395d - React-performancetimeline: 5b0dfc0acba29ea0269ddb34cd6dd59d3b8a1c66 + React-perflogger: 6fd2f6811533e9c19a61e855c3033eecbf4ad2a0 + React-performancetimeline: abf31259d794c9274b3ea19c5016186925eec6c4 React-RCTActionSheet: a499b0d6d9793886b67ba3e16046a3fef2cdbbc3 - React-RCTAnimation: cc64adc259aabc3354b73065e2231d796dfce576 - React-RCTAppDelegate: 9d523da768f1c9e84c5f3b7e3624d097dfb0e16b - React-RCTBlob: e727f53eeefded7e6432eb76bd22b57bc880e5d1 - React-RCTFabric: 58590aa4fdb4ad546c06a7449b486cf6844e991f - React-RCTFBReactNativeSpec: 9064c63d99e467a3893e328ba3612745c3c3a338 - React-RCTImage: 7159cbdbb18a09d97ba1a611416eced75b3ccb29 - React-RCTLinking: 46293afdb859bccc63e1d3dedc6901a3c04ef360 - React-RCTNetwork: 4a6cd18f5bcd0363657789c64043123a896b1170 - React-RCTRuntime: 5ab904fd749aa52f267ef771d265612582a17880 - React-RCTSettings: 61e361dc85136d1cb0e148b7541993d2ee950ea7 - React-RCTText: abd1e196c3167175e6baef18199c6d9d8ac54b4e - React-RCTVibration: 490e0dcb01a3fe4a0dfb7bc51ad5856d8b84f343 + React-RCTAnimation: 2595dcb10a82216a511b54742f8c28d793852ac6 + React-RCTAppDelegate: f03604b70f57c9469a84a159d8abecf793a5bcff + React-RCTBlob: e00f9b4e2f151938f4d9864cf33ebf24ac03328a + React-RCTFabric: 3945d116fd271598db262d4e6ed5691d431ed9e8 + React-RCTFBReactNativeSpec: 0f4d4f0da938101f2ca9d5333a8f46e527ad2819 + React-RCTImage: dac5e9f8ec476aefe6e60ee640ebc1dfaf1a4dbe + React-RCTLinking: 494b785a40d952a1dfbe712f43214376e5f0e408 + React-RCTNetwork: b3d7c30cd21793e268db107dd0980cb61b3c1c44 + React-RCTRuntime: a8ff419d437228e7b8a793b14f9d711e1cbb82af + React-RCTSettings: a060c7e381a3896104761b8eed7e284d95e37df3 + React-RCTText: 4f272b72dbb61f390d8c8274528f9fdbff983806 + React-RCTVibration: 0e5326220719aca12473d703aa46693e3b4ce67a React-rendererconsistency: 351fdbc5c1fe4da24243d939094a80f0e149c7a1 - React-renderercss: 3438814bee838ae7840a633ab085ac81699fd5cf - React-rendererdebug: 0ac2b9419ad6f88444f066d4b476180af311fb1e + React-renderercss: d333f2ada83969591100d91ec6b23ca2e17e1507 + React-rendererdebug: 039e5949b72ba63c703de020701e3fd152434c61 React-rncore: 57ed480649bb678d8bdc386d20fee8bf2b0c307c - React-RuntimeApple: 8b7a9788f31548298ba1990620fe06b40de65ad7 - React-RuntimeCore: e03d96fbd57ce69fd9bca8c925942194a5126dbc + React-RuntimeApple: 344a5e1105256000afabaa8df12c3e4cab880340 + React-RuntimeCore: 0e48fb5e5160acc0334c7a723a42d42cef4b58b6 React-runtimeexecutor: d60846710facedd1edb70c08b738119b3ee2c6c2 - React-RuntimeHermes: aab794755d9f6efd249b61f3af4417296904e3ba - React-runtimescheduler: c3cd124fa5db7c37f601ee49ca0d97019acd8788 + React-RuntimeHermes: 064286a03871d932c99738e0f8ef854962ab4b99 + React-runtimescheduler: e917ab17ae08c204af1ebf8f669b7e411b0220c8 React-timing: a90f4654cbda9c628614f9bee68967f1768bd6a5 - React-utils: a612d50555b6f0f90c74b7d79954019ad47f5de6 - ReactAppDependencyProvider: 04d5eb15eb46be6720e17a4a7fa92940a776e584 - ReactCodegen: 7ea266ccd94436294f516247db7402b57b1214af - ReactCommon: 76d2dc87136d0a667678668b86f0fca0c16fdeb0 - RNAudioAPI: 2e3fd4bf75aa5717791babb30126707504996f09 - RNDeviceInfo: d863506092aef7e7af3a1c350c913d867d795047 - RNGestureHandler: 7d0931a61d7ba0259f32db0ba7d0963c3ed15d2b - RNLiveAudioStream: 93ac2bb6065be9018d0b00157b220f11cebc1513 - RNReanimated: afd6a269a47d6f13ba295c46c6c0e14e3cbd0d8a - RNScreens: 482e9707f9826230810c92e765751af53826d509 - RNSVG: 794f269526df9ddc1f79b3d1a202b619df0368e3 + React-utils: 51c4e71608b8133fecc9a15801d244ae7bdf3758 + ReactAppDependencyProvider: d5dcc564f129632276bd3184e60f053fcd574d6b + ReactCodegen: c9a256facbe4996140f3fb95c7f03ba61c12acc9 + ReactCommon: 4d0da92a5eb8da86c08e3ec34bd23ab439fb2461 + RNAudioAPI: f93e51adeee0911c8c6629a56f6df35edc60c084 + RNDeviceInfo: feea80a690d2bde1fe51461cf548039258bd03f2 + RNGestureHandler: ccf4105b125002bd88e39d2a1f2b7e6001bcdf34 + RNLiveAudioStream: 02584d52711b6b9f268cb371a4b1bdd76ab3e079 + RNReanimated: c567de23384730756bb19ff55490819980536b09 + RNScreens: c2e3cc506212228c607b4785b315205e28acbf0f + RNSVG: ee32efbed652c5151fd3f98bed13c68af285bc38 SocketRocket: d4aabe649be1e368d1318fdf28a022d714d65748 - sqlite3: 73513155ec6979715d3904ef53a8d68892d4032b - Yoga: c758bfb934100bb4bf9cbaccb52557cee35e8bdf + sqlite3: 1d85290c3321153511f6e900ede7a1608718bbd5 + Yoga: 9f110fc4b7aa538663cba3c14cbb1c335f43c13f PODFILE CHECKSUM: bba19a069e673f2259009e9d2caab44374fdebcf diff --git a/apps/llm/ios/llm.xcodeproj/project.pbxproj b/apps/llm/ios/llm.xcodeproj/project.pbxproj index 4b52e71d52..c86af0f8ab 100644 --- a/apps/llm/ios/llm.xcodeproj/project.pbxproj +++ b/apps/llm/ios/llm.xcodeproj/project.pbxproj @@ -26,14 +26,14 @@ 63C842393C3838DA2ECEFC7C /* Pods-llm.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-llm.release.xcconfig"; path = "Target Support Files/Pods-llm/Pods-llm.release.xcconfig"; sourceTree = ""; }; 8CD8BF58A368F789F1E7DF50 /* ExpoModulesProvider.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; name = ExpoModulesProvider.swift; path = "Pods/Target Support Files/Pods-llm/ExpoModulesProvider.swift"; sourceTree = ""; }; AA286B85B6C04FC6940260E9 /* SplashScreen.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; name = SplashScreen.storyboard; path = llm/SplashScreen.storyboard; sourceTree = ""; }; - B79E360E00239D910BF9B38D /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xml; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = ""; }; + B79E360E00239D910BF9B38D /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = ""; }; BB2F792C24A3F905000567C9 /* Expo.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Expo.plist; sourceTree = ""; }; - E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = ""; }; + E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = ""; }; ED297162215061F000B7C4FE /* JavaScriptCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = JavaScriptCore.framework; path = System/Library/Frameworks/JavaScriptCore.framework; sourceTree = SDKROOT; }; F11748412D0307B40044C1D9 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = AppDelegate.swift; path = llm/AppDelegate.swift; sourceTree = ""; }; F11748442D0722820044C1D9 /* llm-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "llm-Bridging-Header.h"; path = "llm/llm-Bridging-Header.h"; sourceTree = ""; }; F5CE0775ADE5923FA417B603 /* libPods-llm.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-llm.a"; sourceTree = BUILT_PRODUCTS_DIR; }; - F866B7979FB94C8797EE2E3D /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = ""; }; + F866B7979FB94C8797EE2E3D /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -93,6 +93,7 @@ 4F489A14802F01369BFDDEFD /* Pods-llm.debug.xcconfig */, 63C842393C3838DA2ECEFC7C /* Pods-llm.release.xcconfig */, ); + name = Pods; path = Pods; sourceTree = ""; }; @@ -134,6 +135,7 @@ E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */, ); name = Resources; + path = ""; sourceTree = ""; }; BB2F792B24A3F905000567C9 /* Supporting */ = { @@ -353,7 +355,6 @@ CLANG_ENABLE_MODULES = YES; CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = ""; ENABLE_BITCODE = NO; GCC_PREPROCESSOR_DEFINITIONS = ( "$(inherited)", @@ -390,7 +391,6 @@ CLANG_ENABLE_MODULES = YES; CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = llm/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 15.1; LD_RUNPATH_SEARCH_PATHS = ( @@ -468,7 +468,10 @@ LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift\"$(inherited)\""; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; - OTHER_LDFLAGS = "$(inherited) "; + OTHER_LDFLAGS = ( + "$(inherited)", + " ", + ); REACT_NATIVE_PATH = "${PODS_ROOT}/../../../../node_modules/react-native"; SDKROOT = iphoneos; SWIFT_ACTIVE_COMPILATION_CONDITIONS = "$(inherited) DEBUG"; @@ -523,7 +526,10 @@ ); LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift\"$(inherited)\""; MTL_ENABLE_DEBUG_INFO = NO; - OTHER_LDFLAGS = "$(inherited) "; + OTHER_LDFLAGS = ( + "$(inherited)", + " ", + ); REACT_NATIVE_PATH = "${PODS_ROOT}/../../../../node_modules/react-native"; SDKROOT = iphoneos; USE_HERMES = true; diff --git a/apps/llm/ios/llm/llm.entitlements b/apps/llm/ios/llm/llm.entitlements index 0c67376eba..8f5046f7d4 100644 --- a/apps/llm/ios/llm/llm.entitlements +++ b/apps/llm/ios/llm/llm.entitlements @@ -1,5 +1,10 @@ - + + com.apple.developer.kernel.increased-debugging-memory-limit + + com.apple.developer.kernel.increased-memory-limit + + diff --git a/packages/react-native-executorch/android/CMakeLists.txt b/packages/react-native-executorch/android/CMakeLists.txt index 96164c49c6..d35311d7a4 100644 --- a/packages/react-native-executorch/android/CMakeLists.txt +++ b/packages/react-native-executorch/android/CMakeLists.txt @@ -14,4 +14,28 @@ set(COMMON_CPP_DIR "${CMAKE_SOURCE_DIR}/../common") set(LIBS_DIR "${CMAKE_SOURCE_DIR}/../third-party/android/libs") set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../third-party/include") +# FIXME: Below u can see miserable attempts of trying to link tokenizers-cpp +# directly into react-native-executorch instead of it being linked against ExecuTorch +# and then transitively to our library. Please go back to this when we bump ET runtime to the next version. +# The problem with directly linking tokenizers-cpp using a submodule is that we get unresolved symbols for +# some android logging libraries, which are referenced by sentencepiece. + +# set(TOKENIZERS_CPP_DIR "${CMAKE_SOURCE_DIR}/../../../third-party/tokenizers-cpp") +# add_subdirectory("${TOKENIZERS_CPP_DIR}" tokenizers-cpp) + +# # Link Android log library to sentencepiece targets +# if(TARGET sentencepiece-static) +# target_link_libraries(sentencepiece-static INTERFACE log) +# endif() +# if(TARGET sentencepiece_train-static) +# target_link_libraries(sentencepiece_train-static INTERFACE log) +# endif() + +# # Link log library to sentencepiece executables +# foreach(exe spm_encode spm_decode spm_normalize spm_train spm_export_vocab) +# if(TARGET ${exe}) +# target_link_libraries(${exe} log) +# endif() +# endforeach() + add_subdirectory("${ANDROID_CPP_DIR}") \ No newline at end of file diff --git a/packages/react-native-executorch/android/build.gradle b/packages/react-native-executorch/android/build.gradle index 0e54c69ec0..10fd2323f1 100644 --- a/packages/react-native-executorch/android/build.gradle +++ b/packages/react-native-executorch/android/build.gradle @@ -168,5 +168,6 @@ dependencies { implementation 'com.facebook.fbjni:fbjni:0.6.0' implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version" implementation files('libs/classes.jar') + implementation 'org.opencv:opencv:4.10.0' implementation("com.squareup.okhttp3:okhttp:4.9.2") } diff --git a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt index 11b30acdc6..bf1544aeb4 100644 --- a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt +++ b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt @@ -33,6 +33,7 @@ set(RN_VERSION_LINK_LIBRARIES ) # Dependencies: + # ------- Executorch ------- add_library(executorch SHARED IMPORTED) @@ -40,6 +41,29 @@ add_library(executorch SHARED IMPORTED) set_target_properties(executorch PROPERTIES IMPORTED_LOCATION "${LIBS_DIR}/executorch/${ANDROID_ABI}/libexecutorch.so") + +if(ANDROID_ABI STREQUAL "arm64-v8a") + target_compile_definitions(react-native-executorch PRIVATE ARCH_ARM64) + + # ------- pthreadpool ------- + add_library(pthreadpool SHARED IMPORTED) + + set_target_properties(pthreadpool PROPERTIES + IMPORTED_LOCATION "${LIBS_DIR}/pthreadpool/${ANDROID_ABI}/libpthreadpool.so" + INTERFACE_INCLUDE_DIRECTORIES "${LIBS_DIR}/../../include/pthreadpool/") + + # ------- cpuinfo ------- + add_library(cpuinfo SHARED IMPORTED) + + set_target_properties(cpuinfo PROPERTIES + IMPORTED_LOCATION "${LIBS_DIR}/cpuinfo/${ANDROID_ABI}/libcpuinfo.so" + INTERFACE_INCLUDE_DIRECTORIES "${LIBS_DIR}/../../include/cpuinfo/") + set(EXECUTORCH_LIBS + "pthreadpool" + "cpuinfo" + ) +endif() + # ------- OpenCV ------- set(OPENCV_LIBS @@ -70,4 +94,5 @@ target_link_libraries( ${OPENCV_LIBS} ${OPENCV_THIRD_PARTY_LIBS} executorch + ${EXECUTORCH_LIBS} ) \ No newline at end of file diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt deleted file mode 100644 index 04205ddcca..0000000000 --- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt +++ /dev/null @@ -1,63 +0,0 @@ -package com.swmansion.rnexecutorch - -import android.util.Log -import com.facebook.react.bridge.Promise -import com.facebook.react.bridge.ReactApplicationContext -import org.pytorch.executorch.extension.llm.LlmCallback -import org.pytorch.executorch.extension.llm.LlmModule - -class LLM( - reactContext: ReactApplicationContext, -) : NativeLLMSpec(reactContext), - LlmCallback { - private var llmModule: LlmModule? = null - - override fun getName(): String = NAME - - override fun initialize() { - super.initialize() - } - - override fun onResult(result: String) { - emitOnToken(result) - } - - override fun onStats(tps: Float) { - Log.d("rn_executorch", "TPS: $tps") - } - - override fun loadLLM( - modelSource: String, - tokenizerSource: String, - promise: Promise, - ) { - try { - llmModule = LlmModule(modelSource, tokenizerSource, 0.7f) - promise.resolve("Model loaded successfully") - } catch (e: Exception) { - promise.reject("Model loading failed", e.message) - } - } - - override fun forward( - input: String, - promise: Promise, - ) { - Thread { - llmModule!!.generate(input, this) - promise.resolve("Inference completed successfully") - }.start() - } - - override fun interrupt() { - llmModule!!.stop() - } - - override fun releaseResources() { - llmModule = null - } - - companion object { - const val NAME = "LLM" - } -} diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt index 98a1fa1d38..0b15e216a5 100644 --- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +++ b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt @@ -14,9 +14,7 @@ class RnExecutorchPackage : TurboReactPackage() { name: String, reactContext: ReactApplicationContext, ): NativeModule? = - if (name == LLM.NAME) { - LLM(reactContext) - } else if (name == ETInstaller.NAME) { + if (name == ETInstaller.NAME) { ETInstaller(reactContext) } else { null @@ -25,16 +23,6 @@ class RnExecutorchPackage : TurboReactPackage() { override fun getReactModuleInfoProvider(): ReactModuleInfoProvider = ReactModuleInfoProvider { val moduleInfos: MutableMap = HashMap() - moduleInfos[LLM.NAME] = - ReactModuleInfo( - LLM.NAME, - LLM.NAME, - false, // canOverrideExistingModule - false, // needsEagerInit - true, // hasConstants - false, // isCxxModule - true, - ) moduleInfos[ETInstaller.NAME] = ReactModuleInfo( ETInstaller.NAME, diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp index 31b4691cc8..0ac90972df 100644 --- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp @@ -6,12 +6,20 @@ #include #include #include +#include #include #include #include #include #include +#if defined(__ANDROID__) && defined(__aarch64__) +#include +#include +#include +#include +#endif + namespace rnexecutorch { // This function fetches data from a url address. It is implemented in @@ -58,14 +66,15 @@ void RnExecutorchInstaller::injectJSIBindings( *jsiRuntime, "loadImageEmbeddings", RnExecutorchInstaller::loadModel( jsiRuntime, jsCallInvoker, "loadImageEmbeddings")); + jsiRuntime->global().setProperty( *jsiRuntime, "loadTextEmbeddings", RnExecutorchInstaller::loadModel( jsiRuntime, jsCallInvoker, "loadTextEmbeddings")); - jsiRuntime->global().setProperty( - *jsiRuntime, "loadSpeechToText", - RnExecutorchInstaller::loadModel(jsiRuntime, jsCallInvoker, - "loadSpeechToText")); + + jsiRuntime->global().setProperty(*jsiRuntime, "loadLLM", + RnExecutorchInstaller::loadModel( + jsiRuntime, jsCallInvoker, "loadLLM")); jsiRuntime->global().setProperty(*jsiRuntime, "loadOCR", RnExecutorchInstaller::loadModel( @@ -74,5 +83,29 @@ void RnExecutorchInstaller::injectJSIBindings( *jsiRuntime, "loadVerticalOCR", RnExecutorchInstaller::loadModel(jsiRuntime, jsCallInvoker, "loadVerticalOCR")); + + jsiRuntime->global().setProperty( + *jsiRuntime, "loadSpeechToText", + RnExecutorchInstaller::loadModel(jsiRuntime, jsCallInvoker, + "loadSpeechToText")); + +#if defined(__ANDROID__) && defined(__aarch64__) + auto num_of_perf_cores = + ::executorch::extension::cpuinfo::get_num_performant_cores(); + log(LOG_LEVEL::Info, + std::format("Detected {} performant cores", num_of_perf_cores)); + // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because depending + // on cpu arch as when possible we want to leave at least 2 performant cores + // for other tasks (setting more actually results in drop of performance). For + // older devices (i.e. samsung s22) resolves to 3 cores, and for newer ones + // (like OnePlus 12) resolves to 4, which when benchamrked gives highest + // throughput. + auto num_of_cores = static_cast(num_of_perf_cores / 2) + 1; + ::executorch::extension::threadpool::get_threadpool() + ->_unsafe_reset_threadpool(num_of_cores); + log(LOG_LEVEL::Info, + std::format("Configuring xnnpack for {} threads", num_of_cores)); +#endif } + } // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h index a4b923003f..cd24787e3f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h +++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h @@ -34,6 +34,8 @@ REGISTER_CONSTRUCTOR(ImageEmbeddings, std::string, std::shared_ptr); REGISTER_CONSTRUCTOR(TextEmbeddings, std::string, std::string, std::shared_ptr); +REGISTER_CONSTRUCTOR(LLM, std::string, std::string, + std::shared_ptr); REGISTER_CONSTRUCTOR(SpeechToText, std::string, std::string, std::string, std::shared_ptr); REGISTER_CONSTRUCTOR(OCR, std::string, std::string, std::string, std::string, diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 029ccef47d..4fd3da5e69 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -45,6 +45,14 @@ inline std::string getValue(const jsi::Value &val, return val.getString(runtime).utf8(runtime); } +template <> +inline std::shared_ptr +getValue>(const jsi::Value &val, + jsi::Runtime &runtime) { + return std::make_shared( + val.asObject(runtime).asFunction(runtime)); +} + template <> inline std::vector getValue>(const jsi::Value &val, jsi::Runtime &runtime) { diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index 15d8698cd3..eb4e426149 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -1,6 +1,11 @@ #pragma once #include +#include +#include +#include +#include + #include #include #include @@ -10,12 +15,9 @@ #include #include #include +#include #include #include -#include -#include -#include -#include namespace rnexecutorch { @@ -77,6 +79,20 @@ template class ModelHostObject : public JsiHostObject { promiseHostFunction<&Model::tokenToId>, "tokenToId")); } + + if constexpr (meta::SameAs) { + addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generate>, + "generate")); + + addFunctions(JSI_EXPORT_FUNCTION( + ModelHostObject, synchronousHostFunction<&Model::interrupt>, + "interrupt")); + + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, unload, "unload")); + } + if constexpr (meta::SameAs) { addFunctions( JSI_EXPORT_FUNCTION(ModelHostObject, unload, "unload")); @@ -129,6 +145,9 @@ template class ModelHostObject : public JsiHostObject { } } + // A generic host function that resolves a promise with a result of a + // function. JSI arguments are converted to the types provided in the function + // signature, and the return value is converted back to JSI before resolving. template JSI_HOST_FUNCTION(promiseHostFunction) { auto promise = Promise::createPromise( runtime, callInvoker, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp new file mode 100644 index 0000000000..7767719e1f --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp @@ -0,0 +1,58 @@ +#include "LLM.h" + +#include +#include + +namespace rnexecutorch { +using namespace facebook; +using executorch::extension::TensorPtr; +using executorch::runtime::Error; + +LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource, + std::shared_ptr callInvoker) + : runner(std::make_unique(modelSource, tokenizerSource)), + callInvoker(callInvoker) { + + auto loadResult = runner->load(); + if (loadResult != Error::Ok) { + throw std::runtime_error("Failed to load LLM runner, error code: " + + std::to_string(static_cast(loadResult))); + } + memorySizeLowerBound = + std::filesystem::file_size(std::filesystem::path(modelSource)) + + std::filesystem::file_size(std::filesystem::path(tokenizerSource)); +} + +void LLM::generate(std::string input, std::shared_ptr callback) { + if (!runner || !runner->is_loaded()) { + throw std::runtime_error("Runner is not loaded"); + } + + // Create a native callback that will invoke the JS callback on the JS thread + auto nativeCallback = [this, callback](const std::string &token) { + callInvoker->invokeAsync([callback, token](jsi::Runtime &runtime) { + callback->call(runtime, jsi::String::createFromUtf8(runtime, token)); + }); + }; + + auto error = runner->generate(input, nativeCallback, {}, false); + if (error != executorch::runtime::Error::Ok) { + throw std::runtime_error("Failed to generate text, error code: " + + std::to_string(static_cast(error))); + } +} + +void LLM::interrupt() { + if (!runner || !runner->is_loaded()) { + throw std::runtime_error("Can't interrupt a model that's not loaded!"); + } + runner->stop(); +} + +std::size_t LLM::getMemoryLowerBound() const noexcept { + return memorySizeLowerBound; +} + +void LLM::unload() noexcept { runner.reset(nullptr); } + +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h new file mode 100644 index 0000000000..f946409d95 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace rnexecutorch { +using namespace facebook; + +class LLM { +public: + explicit LLM(const std::string &modelSource, + const std::string &tokenizerSource, + std::shared_ptr callInvoker); + + void generate(std::string input, std::shared_ptr callback); + void interrupt(); + void unload() noexcept; + std::size_t getMemoryLowerBound() const noexcept; + +private: + size_t memorySizeLowerBound; + std::unique_ptr runner; + std::shared_ptr callInvoker; +}; +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h b/packages/react-native-executorch/common/runner/irunner.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h rename to packages/react-native-executorch/common/runner/irunner.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp b/packages/react-native-executorch/common/runner/runner.cpp similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp rename to packages/react-native-executorch/common/runner/runner.cpp diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h b/packages/react-native-executorch/common/runner/runner.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h rename to packages/react-native-executorch/common/runner/runner.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp b/packages/react-native-executorch/common/runner/sampler.cpp similarity index 97% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp rename to packages/react-native-executorch/common/runner/sampler.cpp index 7ba8152889..e156b9f70e 100644 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp +++ b/packages/react-native-executorch/common/runner/sampler.cpp @@ -184,9 +184,10 @@ template int32_t Sampler::sample(T *logits) { } template int32_t Sampler::sample(float *logits); -template int32_t Sampler::sample(exec_aten::Half *logits); template int32_t -Sampler::sample(exec_aten::BFloat16 *logits); +Sampler::sample(executorch::aten::Half *logits); +template int32_t +Sampler::sample(executorch::aten::BFloat16 *logits); } // namespace llm } // namespace extension diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h b/packages/react-native-executorch/common/runner/sampler.h similarity index 91% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h rename to packages/react-native-executorch/common/runner/sampler.h index 0b29ca9fcb..03d3d09a01 100644 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h +++ b/packages/react-native-executorch/common/runner/sampler.h @@ -19,18 +19,19 @@ #endif #include +#include namespace executorch { namespace extension { namespace llm { // A simple llama2 sampler. -template struct ProbIndex { +template struct ET_EXPERIMENTAL ProbIndex { T prob; int32_t index; }; // struct used when sorting probabilities during top-p sampling -class Sampler { +class ET_EXPERIMENTAL Sampler { public: Sampler(int32_t vocab_size, float temperature, float topp, unsigned long long rng_seed); diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h b/packages/react-native-executorch/common/runner/stats.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h rename to packages/react-native-executorch/common/runner/stats.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp b/packages/react-native-executorch/common/runner/text_decoder_runner.cpp similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp rename to packages/react-native-executorch/common/runner/text_decoder_runner.cpp diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h b/packages/react-native-executorch/common/runner/text_decoder_runner.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h rename to packages/react-native-executorch/common/runner/text_decoder_runner.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp b/packages/react-native-executorch/common/runner/text_prefiller.cpp similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp rename to packages/react-native-executorch/common/runner/text_prefiller.cpp diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h b/packages/react-native-executorch/common/runner/text_prefiller.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h rename to packages/react-native-executorch/common/runner/text_prefiller.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h b/packages/react-native-executorch/common/runner/text_token_generator.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h rename to packages/react-native-executorch/common/runner/text_token_generator.h diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h b/packages/react-native-executorch/common/runner/util.h similarity index 100% rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h rename to packages/react-native-executorch/common/runner/util.h diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib index cc7da01f7d..3acc9408d9 100755 Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib differ diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h deleted file mode 100644 index 4332cf811d..0000000000 --- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +++ /dev/null @@ -1,14 +0,0 @@ -#import - -@interface HuggingFaceTokenizer : NSObject - -- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath; -- (NSArray *)encode:(NSString *)text; -- (NSString *)decode:(NSArray *)tokenIds; -- (NSString *)decode:(NSArray *)tokenIds - skipSpecialTokens:(BOOL)skipSpecialTokens; -- (NSUInteger)getVocabSize; -- (NSString *)idToToken:(NSInteger)tokenId; -- (NSInteger)tokenToId:(NSString *)token; - -@end diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h deleted file mode 100644 index d8638cfa6a..0000000000 --- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#import - -NS_ASSUME_NONNULL_BEGIN - -FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain; - -NS_SWIFT_NAME(Runner) -@interface LLaMARunner : NSObject - -- (instancetype)initWithModelPath:(NSString *)filePath - tokenizerPath:(NSString *)tokenizerPath; -- (BOOL)isLoaded; -- (BOOL)loadWithError:(NSError **)error; -- (BOOL)generate:(NSString *)prompt - withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error; -- (void)stop; - -+ (instancetype)new NS_UNAVAILABLE; -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib index 71f3ae6809..43df0d606e 100755 Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib differ diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h deleted file mode 100644 index 4332cf811d..0000000000 --- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h +++ /dev/null @@ -1,14 +0,0 @@ -#import - -@interface HuggingFaceTokenizer : NSObject - -- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath; -- (NSArray *)encode:(NSString *)text; -- (NSString *)decode:(NSArray *)tokenIds; -- (NSString *)decode:(NSArray *)tokenIds - skipSpecialTokens:(BOOL)skipSpecialTokens; -- (NSUInteger)getVocabSize; -- (NSString *)idToToken:(NSInteger)tokenId; -- (NSInteger)tokenToId:(NSString *)token; - -@end diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h deleted file mode 100644 index d8638cfa6a..0000000000 --- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#import - -NS_ASSUME_NONNULL_BEGIN - -FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain; - -NS_SWIFT_NAME(Runner) -@interface LLaMARunner : NSObject - -- (instancetype)initWithModelPath:(NSString *)filePath - tokenizerPath:(NSString *)tokenizerPath; -- (BOOL)isLoaded; -- (BOOL)loadWithError:(NSError **)error; -- (BOOL)generate:(NSString *)prompt - withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error; -- (void)stop; - -+ (instancetype)new NS_UNAVAILABLE; -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.h b/packages/react-native-executorch/ios/RnExecutorch/LLM.h deleted file mode 100644 index 5047919a48..0000000000 --- a/packages/react-native-executorch/ios/RnExecutorch/LLM.h +++ /dev/null @@ -1,5 +0,0 @@ -#import - -@interface LLM : NativeLLMSpecBase - -@end diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm b/packages/react-native-executorch/ios/RnExecutorch/LLM.mm deleted file mode 100644 index 33971f755c..0000000000 --- a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm +++ /dev/null @@ -1,78 +0,0 @@ -#import "LLM.h" -#import - -@implementation LLM { - LLaMARunner *runner; -} - -- (instancetype)init { - self = [super init]; - - return self; -} - -RCT_EXPORT_MODULE() - -- (void)onResult:(NSString *)token prompt:(NSString *)prompt { - if ([token isEqualToString:prompt]) { - return; - } - - dispatch_async(dispatch_get_main_queue(), ^{ - [self emitOnToken:token]; - }); -} - -- (void)loadLLM:(NSString *)modelSource - tokenizerSource:(NSString *)tokenizerSource - resolve:(RCTPromiseResolveBlock)resolve - reject:(RCTPromiseRejectBlock)reject { - @try { - self->runner = [[LLaMARunner alloc] initWithModelPath:modelSource - tokenizerPath:tokenizerSource]; - - resolve(@"Model and tokenizer loaded successfully"); - return; - } @catch (NSException *exception) { - [self releaseResources]; - reject(@"Model or tokenizer loading failed", exception.reason, nil); - return; - } -} - -- (void)forward:(NSString *)input - resolve:(RCTPromiseResolveBlock)resolve - reject:(RCTPromiseRejectBlock)reject { - - dispatch_async( - dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - NSError *error = nil; - [self->runner generate:input - withTokenCallback:^(NSString *token) { - [self onResult:token prompt:input]; - } - error:&error]; - - if (error) { - reject(@"error_in_generation", error.localizedDescription, nil); - return; - } - resolve(@"Inference completed successfully"); - return; - }); -} - -- (void)interrupt { - [self->runner stop]; -} - -- (void)releaseResources { - self->runner = nil; -} - -- (std::shared_ptr)getTurboModule: - (const facebook::react::ObjCTurboModule::InitParams &)params { - return std::make_shared(params); -} - -@end diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_mps_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_ios.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libexecutorch_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libexecutorch_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_custom_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_portable_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a similarity index 100% rename from packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a new file mode 100644 index 0000000000..69cc738f00 Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a differ diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a new file mode 100644 index 0000000000..86280b1c5c Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a differ diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a new file mode 100644 index 0000000000..6c99b48d6f Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a differ diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a new file mode 100644 index 0000000000..f3aa9203d9 Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a differ diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a new file mode 100644 index 0000000000..a24e87cedf Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a differ diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a new file mode 100644 index 0000000000..338db91c8b Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a differ diff --git a/packages/react-native-executorch/package.json b/packages/react-native-executorch/package.json index f47f62f51d..fc83fd0df4 100644 --- a/packages/react-native-executorch/package.json +++ b/packages/react-native-executorch/package.json @@ -12,7 +12,6 @@ "lib", "android", "ios", - "!ios/libs", "cpp", "common", "*.podspec", diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec index 67b03a8552..400b0883f0 100644 --- a/packages/react-native-executorch/react-native-executorch.podspec +++ b/packages/react-native-executorch/react-native-executorch.podspec @@ -13,52 +13,56 @@ Pod::Spec.new do |s| s.platforms = { :ios => min_ios_version_supported } s.source = { :git => "https://github.com/software-mansion/react-native-executorch.git", :tag => "#{s.version}" } - et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs', __dir__) + et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/executorch', __dir__) + tokenizers_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/tokenizers-cpp', __dir__) s.user_target_xcconfig = { "HEADER_SEARCH_PATHS" => "$(PODS_TARGET_SRCROOT)/third-party/include", - # FIXME: The code below links the static libraries built from ExecuTorch against out library. - # Please uncomment it once the ExecuTorchLib is no longer required. - - # "OTHER_LDFLAGS[sdk=iphoneos*][arch=*]" => [ - # '$(inherited)', - # '-framework "CoreML"', - # '-framework "Accelerate"', - # '-framework "Metal"', - # '-framework "MetalPerformanceShaders"', - # '-framework "MetalPerformanceShadersGraph"', - # "-force_load \"#{et_binaries_path}\"/libbackend_coreml_ios.a", - # "-force_load \"#{et_binaries_path}\"/libbackend_mps_ios.a", - # "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_ios.a", - # "-force_load \"#{et_binaries_path}\"/libexecutorch_ios.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_custom_ios.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_optimized_ios.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a" - # ].join(' '), - - # "OTHER_LDFLAGS[sdk=iphonesimulator*][arch=*]" => [ - # '$(inherited)', - # '-framework "CoreML"', - # '-framework "Accelerate"', - # '-framework "Metal"', - # '-framework "MetalPerformanceShaders"', - # '-framework "MetalPerformanceShadersGraph"', - # "-force_load \"#{et_binaries_path}\"/libbackend_coreml_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libbackend_mps_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libexecutorch_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_custom_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_optimized_simulator.a", - # "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a" - # ].join(' '), + "OTHER_LDFLAGS[sdk=iphoneos*][arch=*]" => [ + '$(inherited)', + '-framework "CoreML"', + '-framework "Accelerate"', + '-framework "Metal"', + '-framework "MetalPerformanceShaders"', + '-framework "MetalPerformanceShadersGraph"', + "-force_load \"#{et_binaries_path}\"/libbackend_coreml_ios.a", + "-force_load \"#{et_binaries_path}\"/libbackend_mps_ios.a", + "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_ios.a", + "-force_load \"#{et_binaries_path}\"/libexecutorch_ios.a", + "-force_load \"#{et_binaries_path}\"/libkernels_custom_ios.a", + "-force_load \"#{et_binaries_path}\"/libkernels_optimized_ios.a", + "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a", + "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_cpp.a\"", + "\"#{tokenizers_binaries_path}/physical-arm64-release/libsentencepiece.a\"", + "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\"" + ].join(' '), + + "OTHER_LDFLAGS[sdk=iphonesimulator*][arch=*]" => [ + '$(inherited)', + '-framework "CoreML"', + '-framework "Accelerate"', + '-framework "Metal"', + '-framework "MetalPerformanceShaders"', + '-framework "MetalPerformanceShadersGraph"', + "-force_load \"#{et_binaries_path}\"/libbackend_coreml_simulator.a", + "-force_load \"#{et_binaries_path}\"/libbackend_mps_simulator.a", + "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_simulator.a", + "-force_load \"#{et_binaries_path}\"/libexecutorch_simulator.a", + "-force_load \"#{et_binaries_path}\"/libkernels_custom_simulator.a", + "-force_load \"#{et_binaries_path}\"/libkernels_optimized_simulator.a", + "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a", + "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_cpp.a\"", + "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libsentencepiece.a\"", + "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\"" + ].join(' '), 'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'x86_64', } s.pod_target_xcconfig = { "USE_HEADERMAP" => "YES", - "HEADER_SEARCH_PATHS" => + "HEADER_SEARCH_PATHS" => '"$(PODS_TARGET_SRCROOT)/ios" '+ '"$(PODS_TARGET_SRCROOT)/third-party/include" '+ '"$(PODS_TARGET_SRCROOT)/common" ', @@ -73,11 +77,11 @@ Pod::Spec.new do |s| ] # Exclude file with tests to not introduce gtest dependency. - # Do not include the headers from common/rnexecutorch/jsi/ as source files. - # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if - # another pod includes a header with the same name without a path by - # #include "Header.h" we get a conflict. Here, headers in jsi/ collide with - # react-native-skia. The headers are preserved by preserve_paths and + # Do not include the headers from common/rnexecutorch/jsi/ as source files. + # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if + # another pod includes a header with the same name without a path by + # #include "Header.h" we get a conflict. Here, headers in jsi/ collide with + # react-native-skia. The headers are preserved by preserve_paths and # then made available by HEADER_SEARCH_PATHS. s.exclude_files = [ "common/rnexecutorch/tests/*.{cpp}", @@ -86,9 +90,9 @@ Pod::Spec.new do |s| s.header_mappings_dir = "common/rnexecutorch" s.header_dir = "rnexecutorch" s.preserve_paths = "common/rnexecutorch/jsi/*.{h,hpp}" - + s.dependency "opencv-rne", "~> 4.11.0" s.dependency "sqlite3" install_modules_dependencies(s) -end +end \ No newline at end of file diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts index 5bc7852350..a1d2fb1036 100644 --- a/packages/react-native-executorch/src/controllers/LLMController.ts +++ b/packages/react-native-executorch/src/controllers/LLMController.ts @@ -1,4 +1,3 @@ -import { EventSubscription } from 'react-native'; import { ResourceSource } from '../types/common'; import { ResourceFetcher } from '../utils/ResourceFetcher'; import { ETError, getError } from '../Error'; @@ -12,16 +11,15 @@ import { SPECIAL_TOKENS, ToolsConfig, } from '../types/llm'; -import { LLMNativeModule } from '../native/RnExecutorchModules'; import { parseToolCall } from '../utils/llm'; import { Logger } from '../common/Logger'; export class LLMController { - private nativeModule: typeof LLMNativeModule; + private nativeModule: any; private chatConfig: ChatConfig = DEFAULT_CHAT_CONFIG; private toolsConfig: ToolsConfig | undefined; private tokenizerConfig: any; - private onToken: EventSubscription | null = null; + private onToken?: (token: string) => void; private _response = ''; private _isReady = false; private _isGenerating = false; @@ -71,7 +69,6 @@ export class LLMController { this._isGenerating = isGenerating; isGeneratingCallback?.(isGenerating); }; - this.nativeModule = LLMNativeModule; } public get response() { @@ -132,10 +129,9 @@ export class LLMController { this.tokenizerConfig = JSON.parse( await readAsStringAsync('file://' + tokenizerConfigPath!) ); - - await this.nativeModule.loadLLM(modelPath, tokenizerPath); + this.nativeModule = global.loadLLM(modelPath, tokenizerPath); this.isReadyCallback(true); - this.onToken = this.nativeModule.onToken((data: string) => { + this.onToken = (data: string) => { if ( !data || (SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig && @@ -148,7 +144,7 @@ export class LLMController { this.tokenCallback(data); this.responseCallback(this._response + data); - }); + }; } catch (e) { this.isReadyCallback(false); throw new Error(getError(e)); @@ -182,9 +178,8 @@ export class LLMController { 'You cannot delete the model now. You need to interrupt first.' ); } - this.onToken?.remove(); - this.onToken = null; - this.nativeModule.releaseResources(); + this.onToken = () => {}; + this.nativeModule.unload(); this.isReadyCallback(false); this.isGeneratingCallback(false); } @@ -199,7 +194,7 @@ export class LLMController { try { this.responseCallback(''); this.isGeneratingCallback(true); - await this.nativeModule.forward(input); + await this.nativeModule.generate(input, this.onToken); } catch (e) { throw new Error(getError(e)); } finally { diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts index 4a3cbe8c1e..a90d8afb77 100644 --- a/packages/react-native-executorch/src/index.ts +++ b/packages/react-native-executorch/src/index.ts @@ -10,6 +10,7 @@ declare global { var loadTokenizerModule: (source: string) => any; var loadImageEmbeddings: (source: string) => any; var loadTextEmbeddings: (modelSource: string, tokenizerSource: string) => any; + var loadLLM: (modelSource: string, tokenizerSource: string) => any; var loadSpeechToText: ( encoderSource: string, decoderSource: string, @@ -40,10 +41,10 @@ if ( global.loadTokenizerModule == null || global.loadTextEmbeddings == null || global.loadImageEmbeddings == null || + global.loadLLM == null || global.loadSpeechToText == null || global.loadOCR == null || - global.loadVerticalOCR == null || - global.loadImageEmbeddings == null + global.loadVerticalOCR == null ) { if (!ETInstallerNativeModule) { throw new Error( diff --git a/packages/react-native-executorch/src/native/NativeLLM.ts b/packages/react-native-executorch/src/native/NativeLLM.ts deleted file mode 100644 index e89ba01f53..0000000000 --- a/packages/react-native-executorch/src/native/NativeLLM.ts +++ /dev/null @@ -1,14 +0,0 @@ -import type { TurboModule } from 'react-native'; -import { TurboModuleRegistry } from 'react-native'; -import type { EventEmitter } from 'react-native/Libraries/Types/CodegenTypes'; - -export interface Spec extends TurboModule { - loadLLM(modelSource: string, tokenizerSource: string): Promise; - forward(input: string): Promise; - interrupt(): void; - releaseResources(): void; - - readonly onToken: EventEmitter; -} - -export default TurboModuleRegistry.get('LLM'); diff --git a/packages/react-native-executorch/src/native/RnExecutorchModules.ts b/packages/react-native-executorch/src/native/RnExecutorchModules.ts index 92368785b7..3cf4a10bbb 100644 --- a/packages/react-native-executorch/src/native/RnExecutorchModules.ts +++ b/packages/react-native-executorch/src/native/RnExecutorchModules.ts @@ -1,5 +1,4 @@ import { Platform } from 'react-native'; -import { Spec as LLMInterface } from './NativeLLM'; import { Spec as ETInstallerInterface } from './NativeETInstaller'; const LINKING_ERROR = @@ -23,8 +22,5 @@ function returnSpecOrThrowLinkingError(spec: any) { const ETInstallerNativeModule: ETInstallerInterface = returnSpecOrThrowLinkingError(require('./NativeETInstaller').default); -const LLMNativeModule: LLMInterface = returnSpecOrThrowLinkingError( - require('./NativeLLM').default -); -export { LLMNativeModule, ETInstallerNativeModule }; +export { ETInstallerNativeModule }; diff --git a/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so b/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so new file mode 100755 index 0000000000..c97092dbce Binary files /dev/null and b/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so differ diff --git a/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so b/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so new file mode 100755 index 0000000000..5b144aaa8c Binary files /dev/null and b/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so differ diff --git a/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h b/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h new file mode 100644 index 0000000000..e2e1410c57 --- /dev/null +++ b/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h @@ -0,0 +1,2305 @@ +#pragma once +#ifndef CPUINFO_H +#define CPUINFO_H + +#ifndef __cplusplus +#include +#endif + +#ifdef __APPLE__ +#include +#endif + +#include + +/* Identify architecture and define corresponding macro */ + +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \ + defined(__i686__) || defined(_M_IX86) +#define CPUINFO_ARCH_X86 1 +#endif + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || \ + defined(_M_AMD64) +#define CPUINFO_ARCH_X86_64 1 +#endif + +#if defined(__arm__) || defined(_M_ARM) +#define CPUINFO_ARCH_ARM 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define CPUINFO_ARCH_ARM64 1 +#endif + +#if defined(__PPC64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) +#define CPUINFO_ARCH_PPC64 1 +#endif + +#if defined(__asmjs__) +#define CPUINFO_ARCH_ASMJS 1 +#endif + +#if defined(__wasm__) +#if defined(__wasm_simd128__) +#define CPUINFO_ARCH_WASMSIMD 1 +#else +#define CPUINFO_ARCH_WASM 1 +#endif +#endif + +#if defined(__riscv) +#if (__riscv_xlen == 32) +#define CPUINFO_ARCH_RISCV32 1 +#elif (__riscv_xlen == 64) +#define CPUINFO_ARCH_RISCV64 1 +#endif +#endif + +/* Define other architecture-specific macros as 0 */ + +#ifndef CPUINFO_ARCH_X86 +#define CPUINFO_ARCH_X86 0 +#endif + +#ifndef CPUINFO_ARCH_X86_64 +#define CPUINFO_ARCH_X86_64 0 +#endif + +#ifndef CPUINFO_ARCH_ARM +#define CPUINFO_ARCH_ARM 0 +#endif + +#ifndef CPUINFO_ARCH_ARM64 +#define CPUINFO_ARCH_ARM64 0 +#endif + +#ifndef CPUINFO_ARCH_PPC64 +#define CPUINFO_ARCH_PPC64 0 +#endif + +#ifndef CPUINFO_ARCH_ASMJS +#define CPUINFO_ARCH_ASMJS 0 +#endif + +#ifndef CPUINFO_ARCH_WASM +#define CPUINFO_ARCH_WASM 0 +#endif + +#ifndef CPUINFO_ARCH_WASMSIMD +#define CPUINFO_ARCH_WASMSIMD 0 +#endif + +#ifndef CPUINFO_ARCH_RISCV32 +#define CPUINFO_ARCH_RISCV32 0 +#endif + +#ifndef CPUINFO_ARCH_RISCV64 +#define CPUINFO_ARCH_RISCV64 0 +#endif + +#if CPUINFO_ARCH_X86 && defined(_MSC_VER) +#define CPUINFO_ABI __cdecl +#elif CPUINFO_ARCH_X86 && defined(__GNUC__) +#define CPUINFO_ABI __attribute__((__cdecl__)) +#else +#define CPUINFO_ABI +#endif + +#define CPUINFO_CACHE_UNIFIED 0x00000001 +#define CPUINFO_CACHE_INCLUSIVE 0x00000002 +#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 + +struct cpuinfo_cache { + /** Cache size in bytes */ + uint32_t size; + /** Number of ways of associativity */ + uint32_t associativity; + /** Number of sets */ + uint32_t sets; + /** Number of partitions */ + uint32_t partitions; + /** Line size in bytes */ + uint32_t line_size; + /** + * Binary characteristics of the cache (unified cache, inclusive cache, + * cache with complex indexing). + * + * @see CPUINFO_CACHE_UNIFIED, CPUINFO_CACHE_INCLUSIVE, + * CPUINFO_CACHE_COMPLEX_INDEXING + */ + uint32_t flags; + /** Index of the first logical processor that shares this cache */ + uint32_t processor_start; + /** Number of logical processors that share this cache */ + uint32_t processor_count; +}; + +struct cpuinfo_trace_cache { + uint32_t uops; + uint32_t associativity; +}; + +#define CPUINFO_PAGE_SIZE_4KB 0x1000 +#define CPUINFO_PAGE_SIZE_1MB 0x100000 +#define CPUINFO_PAGE_SIZE_2MB 0x200000 +#define CPUINFO_PAGE_SIZE_4MB 0x400000 +#define CPUINFO_PAGE_SIZE_16MB 0x1000000 +#define CPUINFO_PAGE_SIZE_1GB 0x40000000 + +struct cpuinfo_tlb { + uint32_t entries; + uint32_t associativity; + uint64_t pages; +}; + +/** Vendor of processor core design */ +enum cpuinfo_vendor { + /** Processor vendor is not known to the library, or the library failed + to get vendor information from the OS. */ + cpuinfo_vendor_unknown = 0, + + /* Active vendors of modern CPUs */ + + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor + * microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design + * was released in 2004. + */ + cpuinfo_vendor_intel = 1, + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor + microarchitectures. */ + cpuinfo_vendor_amd = 2, + /** ARM Holdings plc. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_arm = 3, + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor + microarchitectures. */ + cpuinfo_vendor_qualcomm = 4, + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_apple = 5, + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor + microarchitectures. */ + cpuinfo_vendor_samsung = 6, + /** Nvidia Corporation. Vendor of ARM64-compatible processor + microarchitectures. */ + cpuinfo_vendor_nvidia = 7, + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_mips = 8, + /** International Business Machines Corporation. Vendor of PowerPC + processor microarchitectures. */ + cpuinfo_vendor_ibm = 9, + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. + */ + cpuinfo_vendor_ingenic = 10, + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor + * microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA + * Technologies. + */ + cpuinfo_vendor_via = 11, + /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_cavium = 12, + /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_broadcom = 13, + /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor + microarchitectures. */ + cpuinfo_vendor_apm = 14, + /** + * Huawei Technologies Co., Ltd. Vendor of ARM64 processor + * microarchitectures. + * + * Processors are designed by HiSilicon, a subsidiary of Huawei. + */ + cpuinfo_vendor_huawei = 15, + /** + * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor + * of x86-64 processor microarchitectures. + * + * Processors are variants of AMD cores. + */ + cpuinfo_vendor_hygon = 16, + /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */ + cpuinfo_vendor_sifive = 17, + + /* Active vendors of embedded CPUs */ + + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. + */ + cpuinfo_vendor_texas_instruments = 30, + /** Marvell Technology Group Ltd. Vendor of ARM processor + * microarchitectures. + */ + cpuinfo_vendor_marvell = 31, + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor + microarchitectures. */ + cpuinfo_vendor_rdc = 32, + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_dmp = 33, + /** Motorola, Inc. Vendor of PowerPC and ARM processor + microarchitectures. */ + cpuinfo_vendor_motorola = 34, + + /* Defunct CPU vendors */ + + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation + * to execute x86 code. + */ + cpuinfo_vendor_transmeta = 50, + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ + cpuinfo_vendor_cyrix = 51, + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ + cpuinfo_vendor_rise = 52, + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was + * released in 1998. + */ + cpuinfo_vendor_nsc = 53, + /** + * Silicon Integrated Systems. Vendor of x86 processor + * microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was + * released in 2001. + */ + cpuinfo_vendor_sis = 54, + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 + * instructions into simple microoperations. + */ + cpuinfo_vendor_nexgen = 55, + /** + * United Microelectronics Corporation. Vendor of x86 processor + * microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released + * in 1991. Designed U5C and U5D processors. Both are 486 level. + */ + cpuinfo_vendor_umc = 56, + /** + * Digital Equipment Corporation. Vendor of ARM processor + * microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released + * in 1997. + */ + cpuinfo_vendor_dec = 57, +}; + +/** + * Processor microarchitecture + * + * Processors with different microarchitectures often have different instruction + * performance characteristics, and may have dramatically different pipeline + * organization. + */ +enum cpuinfo_uarch { + /** Microarchitecture is unknown, or the library failed to get + information about the microarchitecture from OS */ + cpuinfo_uarch_unknown = 0, + + /** Pentium and Pentium MMX microarchitecture. */ + cpuinfo_uarch_p5 = 0x00100100, + /** Intel Quark microarchitecture. */ + cpuinfo_uarch_quark = 0x00100101, + + /** Pentium Pro, Pentium II, and Pentium III. */ + cpuinfo_uarch_p6 = 0x00100200, + /** Pentium M. */ + cpuinfo_uarch_dothan = 0x00100201, + /** Intel Core microarchitecture. */ + cpuinfo_uarch_yonah = 0x00100202, + /** Intel Core 2 microarchitecture on 65 nm process. */ + cpuinfo_uarch_conroe = 0x00100203, + /** Intel Core 2 microarchitecture on 45 nm process. */ + cpuinfo_uarch_penryn = 0x00100204, + /** Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st + gen). */ + cpuinfo_uarch_nehalem = 0x00100205, + /** Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */ + cpuinfo_uarch_sandy_bridge = 0x00100206, + /** Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */ + cpuinfo_uarch_ivy_bridge = 0x00100207, + /** Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */ + cpuinfo_uarch_haswell = 0x00100208, + /** Intel Broadwell microarchitecture. */ + cpuinfo_uarch_broadwell = 0x00100209, + /** Intel Sky Lake microarchitecture (14 nm, including + Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */ + cpuinfo_uarch_sky_lake = 0x0010020A, + /** DEPRECATED (Intel Kaby Lake microarchitecture). */ + cpuinfo_uarch_kaby_lake = 0x0010020A, + /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */ + cpuinfo_uarch_palm_cove = 0x0010020B, + /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */ + cpuinfo_uarch_sunny_cove = 0x0010020C, + + /** Pentium 4 with Willamette, Northwood, or Foster cores. */ + cpuinfo_uarch_willamette = 0x00100300, + /** Pentium 4 with Prescott and later cores. */ + cpuinfo_uarch_prescott = 0x00100301, + + /** Intel Atom on 45 nm process. */ + cpuinfo_uarch_bonnell = 0x00100400, + /** Intel Atom on 32 nm process. */ + cpuinfo_uarch_saltwell = 0x00100401, + /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ + cpuinfo_uarch_silvermont = 0x00100402, + /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */ + cpuinfo_uarch_airmont = 0x00100403, + /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */ + cpuinfo_uarch_goldmont = 0x00100404, + /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ + cpuinfo_uarch_goldmont_plus = 0x00100405, + + /** Intel Knights Ferry HPC boards. */ + cpuinfo_uarch_knights_ferry = 0x00100500, + /** Intel Knights Corner HPC boards (aka Xeon Phi). */ + cpuinfo_uarch_knights_corner = 0x00100501, + /** Intel Knights Landing microarchitecture (second-gen MIC). */ + cpuinfo_uarch_knights_landing = 0x00100502, + /** Intel Knights Hill microarchitecture (third-gen MIC). */ + cpuinfo_uarch_knights_hill = 0x00100503, + /** Intel Knights Mill Xeon Phi. */ + cpuinfo_uarch_knights_mill = 0x00100504, + + /** Intel/Marvell XScale series. */ + cpuinfo_uarch_xscale = 0x00100600, + + /** AMD K5. */ + cpuinfo_uarch_k5 = 0x00200100, + /** AMD K6 and alike. */ + cpuinfo_uarch_k6 = 0x00200101, + /** AMD Athlon and Duron. */ + cpuinfo_uarch_k7 = 0x00200102, + /** AMD Athlon 64, Opteron 64. */ + cpuinfo_uarch_k8 = 0x00200103, + /** AMD Family 10h (Barcelona, Istambul, Magny-Cours). */ + cpuinfo_uarch_k10 = 0x00200104, + /** + * AMD Bulldozer microarchitecture + * Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs. + */ + cpuinfo_uarch_bulldozer = 0x00200105, + /** + * AMD Piledriver microarchitecture + * Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu + * Dhabi Opteron CPUs. + */ + cpuinfo_uarch_piledriver = 0x00200106, + /** AMD Steamroller microarchitecture (Kaveri APUs). */ + cpuinfo_uarch_steamroller = 0x00200107, + /** AMD Excavator microarchitecture (Carizzo APUs). */ + cpuinfo_uarch_excavator = 0x00200108, + /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen = 0x00200109, + /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen2 = 0x0020010A, + /** AMD Zen 3 microarchitecture. */ + cpuinfo_uarch_zen3 = 0x0020010B, + /** AMD Zen 4 microarchitecture. */ + cpuinfo_uarch_zen4 = 0x0020010C, + + /** NSC Geode and AMD Geode GX and LX. */ + cpuinfo_uarch_geode = 0x00200200, + /** AMD Bobcat mobile microarchitecture. */ + cpuinfo_uarch_bobcat = 0x00200201, + /** AMD Jaguar mobile microarchitecture. */ + cpuinfo_uarch_jaguar = 0x00200202, + /** AMD Puma mobile microarchitecture. */ + cpuinfo_uarch_puma = 0x00200203, + + /** ARM7 series. */ + cpuinfo_uarch_arm7 = 0x00300100, + /** ARM9 series. */ + cpuinfo_uarch_arm9 = 0x00300101, + /** ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */ + cpuinfo_uarch_arm11 = 0x00300102, + + /** ARM Cortex-A5. */ + cpuinfo_uarch_cortex_a5 = 0x00300205, + /** ARM Cortex-A7. */ + cpuinfo_uarch_cortex_a7 = 0x00300207, + /** ARM Cortex-A8. */ + cpuinfo_uarch_cortex_a8 = 0x00300208, + /** ARM Cortex-A9. */ + cpuinfo_uarch_cortex_a9 = 0x00300209, + /** ARM Cortex-A12. */ + cpuinfo_uarch_cortex_a12 = 0x00300212, + /** ARM Cortex-A15. */ + cpuinfo_uarch_cortex_a15 = 0x00300215, + /** ARM Cortex-A17. */ + cpuinfo_uarch_cortex_a17 = 0x00300217, + + /** ARM Cortex-A32. */ + cpuinfo_uarch_cortex_a32 = 0x00300332, + /** ARM Cortex-A35. */ + cpuinfo_uarch_cortex_a35 = 0x00300335, + /** ARM Cortex-A53. */ + cpuinfo_uarch_cortex_a53 = 0x00300353, + /** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities + compared to revision 1+). */ + cpuinfo_uarch_cortex_a55r0 = 0x00300354, + /** ARM Cortex-A55. */ + cpuinfo_uarch_cortex_a55 = 0x00300355, + /** ARM Cortex-A57. */ + cpuinfo_uarch_cortex_a57 = 0x00300357, + /** ARM Cortex-A65. */ + cpuinfo_uarch_cortex_a65 = 0x00300365, + /** ARM Cortex-A72. */ + cpuinfo_uarch_cortex_a72 = 0x00300372, + /** ARM Cortex-A73. */ + cpuinfo_uarch_cortex_a73 = 0x00300373, + /** ARM Cortex-A75. */ + cpuinfo_uarch_cortex_a75 = 0x00300375, + /** ARM Cortex-A76. */ + cpuinfo_uarch_cortex_a76 = 0x00300376, + /** ARM Cortex-A77. */ + cpuinfo_uarch_cortex_a77 = 0x00300377, + /** ARM Cortex-A78. */ + cpuinfo_uarch_cortex_a78 = 0x00300378, + + /** ARM Neoverse N1. */ + cpuinfo_uarch_neoverse_n1 = 0x00300400, + /** ARM Neoverse E1. */ + cpuinfo_uarch_neoverse_e1 = 0x00300401, + /** ARM Neoverse V1. */ + cpuinfo_uarch_neoverse_v1 = 0x00300402, + /** ARM Neoverse N2. */ + cpuinfo_uarch_neoverse_n2 = 0x00300403, + /** ARM Neoverse V2. */ + cpuinfo_uarch_neoverse_v2 = 0x00300404, + + /** ARM Cortex-X1. */ + cpuinfo_uarch_cortex_x1 = 0x00300501, + /** ARM Cortex-X2. */ + cpuinfo_uarch_cortex_x2 = 0x00300502, + /** ARM Cortex-X3. */ + cpuinfo_uarch_cortex_x3 = 0x00300503, + /** ARM Cortex-X4. */ + cpuinfo_uarch_cortex_x4 = 0x00300504, + + /** ARM Cortex-A510. */ + cpuinfo_uarch_cortex_a510 = 0x00300551, + /** ARM Cortex-A520. */ + cpuinfo_uarch_cortex_a520 = 0x00300552, + /** ARM Cortex-A710. */ + cpuinfo_uarch_cortex_a710 = 0x00300571, + /** ARM Cortex-A715. */ + cpuinfo_uarch_cortex_a715 = 0x00300572, + /** ARM Cortex-A720. */ + cpuinfo_uarch_cortex_a720 = 0x00300573, + + /** Qualcomm Scorpion. */ + cpuinfo_uarch_scorpion = 0x00400100, + /** Qualcomm Krait. */ + cpuinfo_uarch_krait = 0x00400101, + /** Qualcomm Kryo. */ + cpuinfo_uarch_kryo = 0x00400102, + /** Qualcomm Falkor. */ + cpuinfo_uarch_falkor = 0x00400103, + /** Qualcomm Saphira. */ + cpuinfo_uarch_saphira = 0x00400104, + + /** Nvidia Denver. */ + cpuinfo_uarch_denver = 0x00500100, + /** Nvidia Denver 2. */ + cpuinfo_uarch_denver2 = 0x00500101, + /** Nvidia Carmel. */ + cpuinfo_uarch_carmel = 0x00500102, + + /** Samsung Exynos M1 (Exynos 8890 big cores). */ + cpuinfo_uarch_exynos_m1 = 0x00600100, + /** Samsung Exynos M2 (Exynos 8895 big cores). */ + cpuinfo_uarch_exynos_m2 = 0x00600101, + /** Samsung Exynos M3 (Exynos 9810 big cores). */ + cpuinfo_uarch_exynos_m3 = 0x00600102, + /** Samsung Exynos M4 (Exynos 9820 big cores). */ + cpuinfo_uarch_exynos_m4 = 0x00600103, + /** Samsung Exynos M5 (Exynos 9830 big cores). */ + cpuinfo_uarch_exynos_m5 = 0x00600104, + + /* Deprecated synonym for Cortex-A76 */ + cpuinfo_uarch_cortex_a76ae = 0x00300376, + /* Deprecated names for Exynos. */ + cpuinfo_uarch_mongoose_m1 = 0x00600100, + cpuinfo_uarch_mongoose_m2 = 0x00600101, + cpuinfo_uarch_meerkat_m3 = 0x00600102, + cpuinfo_uarch_meerkat_m4 = 0x00600103, + + /** Apple A6 and A6X processors. */ + cpuinfo_uarch_swift = 0x00700100, + /** Apple A7 processor. */ + cpuinfo_uarch_cyclone = 0x00700101, + /** Apple A8 and A8X processor. */ + cpuinfo_uarch_typhoon = 0x00700102, + /** Apple A9 and A9X processor. */ + cpuinfo_uarch_twister = 0x00700103, + /** Apple A10 and A10X processor. */ + cpuinfo_uarch_hurricane = 0x00700104, + /** Apple A11 processor (big cores). */ + cpuinfo_uarch_monsoon = 0x00700105, + /** Apple A11 processor (little cores). */ + cpuinfo_uarch_mistral = 0x00700106, + /** Apple A12 processor (big cores). */ + cpuinfo_uarch_vortex = 0x00700107, + /** Apple A12 processor (little cores). */ + cpuinfo_uarch_tempest = 0x00700108, + /** Apple A13 processor (big cores). */ + cpuinfo_uarch_lightning = 0x00700109, + /** Apple A13 processor (little cores). */ + cpuinfo_uarch_thunder = 0x0070010A, + /** Apple A14 / M1 processor (big cores). */ + cpuinfo_uarch_firestorm = 0x0070010B, + /** Apple A14 / M1 processor (little cores). */ + cpuinfo_uarch_icestorm = 0x0070010C, + /** Apple A15 / M2 processor (big cores). */ + cpuinfo_uarch_avalanche = 0x0070010D, + /** Apple A15 / M2 processor (little cores). */ + cpuinfo_uarch_blizzard = 0x0070010E, + + /** Cavium ThunderX. */ + cpuinfo_uarch_thunderx = 0x00800100, + /** Cavium ThunderX2 (originally Broadcom Vulkan). */ + cpuinfo_uarch_thunderx2 = 0x00800200, + + /** Marvell PJ4. */ + cpuinfo_uarch_pj4 = 0x00900100, + + /** Broadcom Brahma B15. */ + cpuinfo_uarch_brahma_b15 = 0x00A00100, + /** Broadcom Brahma B53. */ + cpuinfo_uarch_brahma_b53 = 0x00A00101, + + /** Applied Micro X-Gene. */ + cpuinfo_uarch_xgene = 0x00B00100, + + /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */ + cpuinfo_uarch_dhyana = 0x01000100, + + /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ + cpuinfo_uarch_taishan_v110 = 0x00C00100, +}; + +struct cpuinfo_processor { + /** SMT (hyperthread) ID within a core */ + uint32_t smt_id; + /** Core containing this logical processor */ + const struct cpuinfo_core *core; + /** Cluster of cores containing this logical processor */ + const struct cpuinfo_cluster *cluster; + /** Physical package containing this logical processor */ + const struct cpuinfo_package *package; +#if defined(__linux__) + /** + * Linux-specific ID for the logical processor: + * - Linux kernel exposes information about this logical processor in + * /sys/devices/system/cpu/cpu/ + * - Bit in the cpu_set_t identifies this logical processor + */ + int linux_id; +#endif +#if defined(_WIN32) || defined(__CYGWIN__) + /** Windows-specific ID for the group containing the logical processor. + */ + uint16_t windows_group_id; + /** + * Windows-specific ID of the logical processor within its group: + * - Bit in the KAFFINITY mask identifies this + * logical processor within its group. + */ + uint16_t windows_processor_id; +#endif +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** APIC ID (unique x86-specific ID of the logical processor) */ + uint32_t apic_id; +#endif + struct { + /** Level 1 instruction cache */ + const struct cpuinfo_cache *l1i; + /** Level 1 data cache */ + const struct cpuinfo_cache *l1d; + /** Level 2 unified or data cache */ + const struct cpuinfo_cache *l2; + /** Level 3 unified or data cache */ + const struct cpuinfo_cache *l3; + /** Level 4 unified or data cache */ + const struct cpuinfo_cache *l4; + } cache; +}; + +struct cpuinfo_core { + /** Index of the first logical processor on this core. */ + uint32_t processor_start; + /** Number of logical processors on this core */ + uint32_t processor_count; + /** Core ID within a package */ + uint32_t core_id; + /** Cluster containing this core */ + const struct cpuinfo_cluster *cluster; + /** Physical package containing this core. */ + const struct cpuinfo_package *package; + /** Vendor of the CPU microarchitecture for this core */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture for this core */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for this core */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for this core */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the core, in Hz */ + uint64_t frequency; +}; + +struct cpuinfo_cluster { + /** Index of the first logical processor in the cluster */ + uint32_t processor_start; + /** Number of logical processors in the cluster */ + uint32_t processor_count; + /** Index of the first core in the cluster */ + uint32_t core_start; + /** Number of cores on the cluster */ + uint32_t core_count; + /** Cluster ID within a package */ + uint32_t cluster_id; + /** Physical package containing the cluster */ + const struct cpuinfo_package *package; + /** CPU microarchitecture vendor of the cores in the cluster */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture of the cores in the cluster */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register of the cores in the cluster */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) of the cores in the cluster */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */ + uint64_t frequency; +}; + +#define CPUINFO_PACKAGE_NAME_MAX 48 + +struct cpuinfo_package { + /** SoC or processor chip model name */ + char name[CPUINFO_PACKAGE_NAME_MAX]; + /** Index of the first logical processor on this physical package */ + uint32_t processor_start; + /** Number of logical processors on this physical package */ + uint32_t processor_count; + /** Index of the first core on this physical package */ + uint32_t core_start; + /** Number of cores on this physical package */ + uint32_t core_count; + /** Index of the first cluster of cores on this physical package */ + uint32_t cluster_start; + /** Number of clusters of cores on this physical package */ + uint32_t cluster_count; +}; + +struct cpuinfo_uarch_info { + /** Type of CPU microarchitecture */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for the microarchitecture */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for the microarchitecture */ + uint32_t midr; +#endif + /** Number of logical processors with the microarchitecture */ + uint32_t processor_count; + /** Number of cores with the microarchitecture */ + uint32_t core_count; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +bool CPUINFO_ABI cpuinfo_initialize(void); + +void CPUINFO_ABI cpuinfo_deinitialize(void); + +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +/* This structure is not a part of stable API. Use cpuinfo_has_x86_* functions + * instead. */ +struct cpuinfo_x86_isa { +#if CPUINFO_ARCH_X86 + bool rdtsc; +#endif + bool rdtscp; + bool rdpid; + bool sysenter; +#if CPUINFO_ARCH_X86 + bool syscall; +#endif + bool msr; + bool clzero; + bool clflush; + bool clflushopt; + bool mwait; + bool mwaitx; +#if CPUINFO_ARCH_X86 + bool emmx; +#endif + bool fxsave; + bool xsave; +#if CPUINFO_ARCH_X86 + bool fpu; + bool mmx; + bool mmx_plus; +#endif + bool three_d_now; + bool three_d_now_plus; +#if CPUINFO_ARCH_X86 + bool three_d_now_geode; +#endif + bool prefetch; + bool prefetchw; + bool prefetchwt1; +#if CPUINFO_ARCH_X86 + bool daz; + bool sse; + bool sse2; +#endif + bool sse3; + bool ssse3; + bool sse4_1; + bool sse4_2; + bool sse4a; + bool misaligned_sse; + bool avx; + bool avxvnni; + bool fma3; + bool fma4; + bool xop; + bool f16c; + bool avx2; + bool avx512f; + bool avx512pf; + bool avx512er; + bool avx512cd; + bool avx512dq; + bool avx512bw; + bool avx512vl; + bool avx512ifma; + bool avx512vbmi; + bool avx512vbmi2; + bool avx512bitalg; + bool avx512vpopcntdq; + bool avx512vnni; + bool avx512bf16; + bool avx512fp16; + bool avx512vp2intersect; + bool avx512_4vnniw; + bool avx512_4fmaps; + bool amx_bf16; + bool amx_tile; + bool amx_int8; + bool amx_fp16; + bool avx_vnni_int8; + bool avx_vnni_int16; + bool avx_ne_convert; + bool hle; + bool rtm; + bool xtest; + bool mpx; +#if CPUINFO_ARCH_X86 + bool cmov; + bool cmpxchg8b; +#endif + bool cmpxchg16b; + bool clwb; + bool movbe; +#if CPUINFO_ARCH_X86_64 + bool lahf_sahf; +#endif + bool fs_gs_base; + bool lzcnt; + bool popcnt; + bool tbm; + bool bmi; + bool bmi2; + bool adx; + bool aes; + bool vaes; + bool pclmulqdq; + bool vpclmulqdq; + bool gfni; + bool rdrand; + bool rdseed; + bool sha; + bool rng; + bool ace; + bool ace2; + bool phe; + bool pmm; + bool lwp; +}; + +extern struct cpuinfo_x86_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_x86_rdtsc(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.rdtsc; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdtscp(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdtscp; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdpid(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdpid; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_clzero(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clzero; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mwait(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwait; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mwaitx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwaitx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fxsave(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fxsave; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xsave(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xsave; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fpu(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.fpu; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mmx(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.mmx; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mmx_plus(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.mmx_plus; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow_plus(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now_plus; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_3dnow_geode(void) { +#if CPUINFO_ARCH_X86_64 + return false; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return false; +#else + return cpuinfo_isa.three_d_now_geode; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetch(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetch; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetchw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_prefetchwt1(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchwt1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_daz(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.daz; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse2(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse2; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse3; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_ssse3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.ssse3; +#endif +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4_1(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse4_1; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4_2(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.sse4_2; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sse4a(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sse4a; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_misaligned_sse(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.misaligned_sse; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avxvnni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avxvnni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fma3(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma3; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_fma4(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma4; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xop(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xop; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_f16c(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.f16c; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512f(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512f; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512pf(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512pf; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512er(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512er; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512cd(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512cd; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512dq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512dq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vl(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vl; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512ifma(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512ifma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bitalg(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bitalg; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vpopcntdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vpopcntdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vnni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vnni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512bf16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512fp16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512vp2intersect(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vp2intersect; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512_4vnniw(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4vnniw; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_avx512_4fmaps(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4fmaps; +#else + return false; +#endif +} + +/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection + * + * I. AMX is a new extensions to the x86 ISA to work on matrices, consists of + * 1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices + * in memory 2) Accelerator called Tile Matrix Multiply (TMUL), contains + * instructions operating on tiles + * + * II. Platforms that supports AMX: + * +-----------------+-----+----------+----------+----------+----------+ + * | Platforms | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 | + * +-----------------+-----+----------+----------+----------+----------+ + * | Sapphire Rapids | 4th | YES | YES | YES | NO | + * +-----------------+-----+----------+----------+----------+----------+ + * | Emerald Rapids | 5th | YES | YES | YES | NO | + * +-----------------+-----+----------+----------+----------+----------+ + * | Granite Rapids | 6th | YES | YES | YES | YES | + * +-----------------+-----+----------+----------+----------+----------+ + * + * Reference: https://www.intel.com/content/www/us/en/products/docs + * /accelerator-engines/advanced-matrix-extensions/overview.html + */ +static inline bool cpuinfo_has_x86_amx_bf16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_tile(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_tile; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_int8(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_int8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_amx_fp16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.amx_fp16; +#else + return false; +#endif +} + +/* + * Intel AVX Vector Neural Network Instructions (VNNI) INT8 + * Supported Platfroms: Sierra Forest, Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_vnni_int8(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_vnni_int8; +#else + return false; +#endif +} + +/* + * Intel AVX Vector Neural Network Instructions (VNNI) INT16 + * Supported Platfroms: Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_vnni_int16(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_vnni_int16; +#else + return false; +#endif +} + +/* + * A new set of instructions, which can convert low precision floating point + * like BF16/FP16 to high precision floating point FP32, as well as convert FP32 + * elements to BF16. This instruction allows the platform to have improved AI + * capabilities and better compatibility. + * + * Supported Platforms: Sierra Forest, Arrow Lake, Lunar Lake + */ +static inline bool cpuinfo_has_x86_avx_ne_convert(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx_ne_convert; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_hle(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.hle; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rtm(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rtm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_xtest(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xtest; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_mpx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mpx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmov(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmov; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmpxchg8b(void) { +#if CPUINFO_ARCH_X86_64 + return true; +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmpxchg8b; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_cmpxchg16b(void) { +#if CPUINFO_ARCH_X86_64 + return cpuinfo_isa.cmpxchg16b; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_clwb(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clwb; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_movbe(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.movbe; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_lahf_sahf(void) { +#if CPUINFO_ARCH_X86 + return true; +#elif CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lahf_sahf; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_lzcnt(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lzcnt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_popcnt(void) { +#if CPUINFO_ARCH_X86_64 +#if defined(__ANDROID__) + return true; +#else + return cpuinfo_isa.popcnt; +#endif +#elif CPUINFO_ARCH_X86 + return cpuinfo_isa.popcnt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_tbm(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.tbm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_bmi(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_bmi2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_adx(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.adx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_aes(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.aes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_vaes(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vaes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_pclmulqdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.pclmulqdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_vpclmulqdq(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vpclmulqdq; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_gfni(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.gfni; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdrand(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdrand; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_rdseed(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdseed; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_x86_sha(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sha; +#else + return false; +#endif +} + +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +/* This structure is not a part of stable API. Use cpuinfo_has_arm_* functions + * instead. */ +struct cpuinfo_arm_isa { +#if CPUINFO_ARCH_ARM + bool thumb; + bool thumb2; + bool thumbee; + bool jazelle; + bool armv5e; + bool armv6; + bool armv6k; + bool armv7; + bool armv7mp; + bool armv8; + bool idiv; + + bool vfpv2; + bool vfpv3; + bool d32; + bool fp16; + bool fma; + + bool wmmx; + bool wmmx2; + bool neon; +#endif +#if CPUINFO_ARCH_ARM64 + bool atomics; + bool bf16; + bool sve; + bool sve2; + bool i8mm; + bool sme; + bool sme2; + bool sme2p1; + bool sme_i16i32; + bool sme_bi32i32; + bool sme_b16b16; + bool sme_f16f16; + uint32_t svelen; +#endif + bool rdm; + bool fp16arith; + bool dot; + bool jscvt; + bool fcma; + bool fhm; + + bool aes; + bool sha1; + bool sha2; + bool pmull; + bool crc32; +}; + +extern struct cpuinfo_arm_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_arm_thumb(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_thumb2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v5e(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv5e; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v6(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v6k(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6k; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v7(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v7mp(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7mp; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_v8(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.armv8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_idiv(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.idiv; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16 && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv4(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_vfpv4_d32(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma && cpuinfo_isa.d32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fp16_arith(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_wmmx(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_wmmx2(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fp16(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fma(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_v8(void) { +#if CPUINFO_ARCH_ARM64 + return true; +#elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.armv8; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_atomics(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.atomics; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_rdm(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.rdm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_fp16_arith(void) { +#if CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16arith; +#elif CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fhm(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fhm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_dot(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.dot; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_neon_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_jscvt(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.jscvt; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_fcma(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fcma; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_i8mm(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.i8mm; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_aes(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.aes; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sha1(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sha2(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_pmull(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.pmull; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_crc32(void) { +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.crc32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve_bf16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve && cpuinfo_isa.bf16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sve2(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve2; +#else + return false; +#endif +} + +// Function to get the max SVE vector length on ARM CPU's which support SVE. +static inline uint32_t cpuinfo_get_max_arm_sve_length(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.svelen * 8; // bytes * 8 = bit length(vector length) +#else + return 0; +#endif +} + +static inline bool cpuinfo_has_arm_sme(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme2(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme2p1(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2p1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_i16i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_i16i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_bi32i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_bi32i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_b16b16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_b16b16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_f16f16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_f16f16; +#else + return false; +#endif +} + +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 +/* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions + * instead. */ +struct cpuinfo_riscv_isa { + /** + * Keep fields in line with the canonical order as defined by + * Section 27.11 Subset Naming Convention. + */ + /* RV32I/64I/128I Base ISA. */ + bool i; +#if CPUINFO_ARCH_RISCV32 + /* RV32E Base ISA. */ + bool e; +#endif + /* Integer Multiply/Divide Extension. */ + bool m; + /* Atomic Extension. */ + bool a; + /* Single-Precision Floating-Point Extension. */ + bool f; + /* Double-Precision Floating-Point Extension. */ + bool d; + /* Compressed Extension. */ + bool c; + /* Vector Extension. */ + bool v; +}; + +extern struct cpuinfo_riscv_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_riscv_i(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.i; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_e(void) { +#if CPUINFO_ARCH_RISCV32 + return cpuinfo_isa.e; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_m(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.m; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_a(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.a; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_f(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.f; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_d(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.d; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_g(void) { + // The 'G' extension is simply shorthand for 'IMAFD'. + return cpuinfo_has_riscv_i() && cpuinfo_has_riscv_m() && + cpuinfo_has_riscv_a() && cpuinfo_has_riscv_f() && + cpuinfo_has_riscv_d(); +} + +static inline bool cpuinfo_has_riscv_c(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.c; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_riscv_v(void) { +#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 + return cpuinfo_isa.v; +#else + return false; +#endif +} + +const struct cpuinfo_processor *CPUINFO_ABI cpuinfo_get_processors(void); +const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_cores(void); +const struct cpuinfo_cluster *CPUINFO_ABI cpuinfo_get_clusters(void); +const struct cpuinfo_package *CPUINFO_ABI cpuinfo_get_packages(void); +const struct cpuinfo_uarch_info *CPUINFO_ABI cpuinfo_get_uarchs(void); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1i_caches(void); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1d_caches(void); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l2_caches(void); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l3_caches(void); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l4_caches(void); + +const struct cpuinfo_processor *CPUINFO_ABI +cpuinfo_get_processor(uint32_t index); +const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_core(uint32_t index); +const struct cpuinfo_cluster *CPUINFO_ABI cpuinfo_get_cluster(uint32_t index); +const struct cpuinfo_package *CPUINFO_ABI cpuinfo_get_package(uint32_t index); +const struct cpuinfo_uarch_info *CPUINFO_ABI cpuinfo_get_uarch(uint32_t index); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index); +const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index); + +uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void); + +/** + * Returns upper bound on cache size. + */ +uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void); + +/** + * Identify the logical processor that executes the current thread. + * + * There is no guarantee that the thread will stay on the same logical processor + * for any time. Callers should treat the result as only a hint, and be prepared + * to handle NULL return value. + */ +const struct cpuinfo_processor *CPUINFO_ABI cpuinfo_get_current_processor(void); + +/** + * Identify the core that executes the current thread. + * + * There is no guarantee that the thread will stay on the same core for any + * time. Callers should treat the result as only a hint, and be prepared to + * handle NULL return value. + */ +const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_current_core(void); + +/** + * Identify the microarchitecture index of the core that executes the current + * thread. If the system does not support such identification, the function + * returns 0. + * + * There is no guarantee that the thread will stay on the same type of core for + * any time. Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void); + +/** + * Identify the microarchitecture index of the core that executes the current + * thread. If the system does not support such identification, the function + * returns the user-specified default value. + * + * There is no guarantee that the thread will stay on the same type of core for + * any time. Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI +cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* CPUINFO_H */ diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h new file mode 100644 index 0000000000..c00cc30a31 --- /dev/null +++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once +#if defined(__ANDROID__) && defined(__aarch64__) + +#include + +namespace executorch::extension::cpuinfo { + +uint32_t get_num_performant_cores(); + +} // namespace executorch::extension::cpuinfo + +namespace torch::executorch::cpuinfo { // DEPRECATED +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. Note that threadpool incorrectly used +// the namespace `torch::executorch` instead of `torch::executor`. +using ::executorch::extension::cpuinfo::get_num_performant_cores; // DEPRECATED +} // namespace torch::executorch::cpuinfo +#endif diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h new file mode 100644 index 0000000000..f0dcc20a8c --- /dev/null +++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once +#if defined(__ANDROID__) && defined(__aarch64__) + +#include +#include +#include + +#include + +namespace executorch::extension::threadpool { + +class ThreadPool final { +public: + explicit ThreadPool(size_t thread_count = 0); + ~ThreadPool() = default; + + // Make threadpool non copyable + // Non-copyable: threadpool cannot be copied because it will + // effectively require cloning of threadpool. + // Cloning can be done by just calling create_thread_pool. + ThreadPool(const ThreadPool &) = delete; + ThreadPool &operator=(const ThreadPool &) = delete; + + // Make threadpool non-movable. + ThreadPool(ThreadPool &&) = delete; + ThreadPool &operator=(ThreadPool &&) = delete; + + size_t get_thread_count() const; + + /** + * INTERNAL: Resets the threadpool by creating a new threadpool with requested + * # of threads. This is not a thread safe call. When calling this method, + * threads of the threadpool might be doing some work. Some other code may + * also be holding on to the threadpool pointer, that is no longer valid. This + * is a private API, which will later be replaced by something that allows + * creating of threadpool with requested size and use such a threadpool with + * backend delegates, custom ops or optimized lib. + */ + [[deprecated("This API is experimental and may change without notice.")]] + bool _unsafe_reset_threadpool(uint32_t num_threads); + + /** + * Run, in parallel, function fn(task_id) over task_id in range [0, range). + * This function is blocking. All input is processed by the time it returns. + * NoThreadPoolGuard (see threadpool_guard.h) can used to disable use of + * multiple threads with the scope of the guard When NoThreadPoolGuard is not + * used all calls to run method are serialized. + */ + void run(const std::function &fn, size_t range); + +private: + friend pthreadpool_t get_pthreadpool(); + +private: + // This mutex is used inside get_thread_count API but it is not really needed + // since data members of ThreadPool objects are not really mutable. + // TODO(kimishpatel): Figure out if we will allow set_num_threads API, in + // which case this mutex will be useful. Otherwise remove it. + mutable std::mutex mutex_; + std::unique_ptr threadpool_; +}; + +/** + * Returns the singleton instance of ThreadPool for ATen/TH multithreading. + */ +ThreadPool *get_threadpool(); + +/** + * Returns the underlying pthreadpool instance used by the implementation of + * ThreadPool returned by `get_threadpool()`. Only for use in external libraries + * so as to unify threading across internal (i.e. ATen, etc.) and external (e.g. + * NNPACK, QNNPACK, XNNPACK) use cases. + */ +pthreadpool_t get_pthreadpool(); + +} // namespace executorch::extension::threadpool + +namespace torch::executorch::threadpool { // DEPRECATED +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. Note that threadpool incorrectly used +// the namespace `torch::executorch` instead of `torch::executor`. +using ::executorch::extension::threadpool::get_pthreadpool; // DEPRECATED +using ::executorch::extension::threadpool::get_threadpool; // DEPRECATED +using ::executorch::extension::threadpool::ThreadPool; // DEPRECATED +} // namespace torch::executorch::threadpool +#endif \ No newline at end of file diff --git a/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h b/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h new file mode 100644 index 0000000000..42d37657fc --- /dev/null +++ b/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h @@ -0,0 +1,2236 @@ +#ifndef PTHREADPOOL_H_ +#define PTHREADPOOL_H_ + +#include +#include + +typedef struct pthreadpool *pthreadpool_t; + +typedef void (*pthreadpool_task_1d_t)(void *, size_t); +typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t); +typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t); +typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t); +typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t, + size_t); +typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t); +typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t, + size_t); +typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t, + size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t, + size_t, size_t, + size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t, + size_t, size_t); +typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t, + size_t, size_t); +typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t); +typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t, + size_t); +typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t); +typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t, + size_t, size_t); +typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t, size_t, + size_t); + +typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t); +typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t, + size_t, size_t); +typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t, + size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t, + size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t, + size_t, size_t, size_t, + size_t); +typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t, + size_t, size_t, size_t, + size_t, size_t); + +typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)( + void *, uint32_t, size_t, size_t, size_t, size_t); +typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)( + void *, uint32_t, size_t, size_t, size_t, size_t, size_t); + +/** + * Disable support for denormalized numbers to the maximum extent possible for + * the duration of the computation. + * + * Handling denormalized floating-point numbers is often implemented in + * microcode, and incurs significant performance degradation. This hint + * instructs the thread pool to disable support for denormalized numbers before + * running the computation by manipulating architecture-specific control + * registers, and restore the initial value of control registers after the + * computation is complete. The thread pool temporary disables denormalized + * numbers on all threads involved in the computation (i.e. the caller threads, + * and potentially worker threads). + * + * Disabling denormalized numbers may have a small negative effect on results' + * accuracy. As various architectures differ in capabilities to control + * processing of denormalized numbers, using this flag may also hurt results' + * reproducibility across different instruction set architectures. + */ +#define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001 + +/** + * Yield worker threads to the system scheduler after the operation is finished. + * + * Force workers to use kernel wait (instead of active spin-wait by default) for + * new commands after this command is processed. This flag affects only the + * immediate next operation on this thread pool. To make the thread pool always + * use kernel wait, pass this flag to all parallelization functions. + */ +#define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002 + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Create a thread pool with the specified number of threads. + * + * @param threads_count the number of threads in the thread pool. + * A value of 0 has special interpretation: it creates a thread pool with as + * many threads as there are logical processors in the system. + * + * @returns A pointer to an opaque thread pool object if the call is + * successful, or NULL pointer if the call failed. + */ +pthreadpool_t pthreadpool_create(size_t threads_count); + +/** + * Query the number of threads in a thread pool. + * + * @param threadpool the thread pool to query. + * + * @returns The number of threads in the thread pool. + */ +size_t pthreadpool_get_threads_count(pthreadpool_t threadpool); + +/** + * Process items on a 1D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * function(context, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. The + * specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d(pthreadpool_t threadpool, + pthreadpool_task_1d_t function, void *context, + size_t range, uint32_t flags); + +/** + * Process items on a 1D grid passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * function(context, thread_index, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. The + * specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_with_thread( + pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function, + void *context, size_t range, uint32_t flags); + +/** + * Process items on a 1D grid using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range; i++) + * function(context, uarch_index, i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range the number of items on the 1D grid to process. + * The specified function will be called once for each item. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range, uint32_t flags); + +/** + * Process items on a 1D grid with specified maximum tile size. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i += tile) + * function(context, i, min(range - i, tile)); + * + * When the call returns, all items have been processed and the thread pool is + * ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, + * the calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range the number of items on the 1D grid to process. + * @param tile the maximum number of items on the 1D grid to process in + * one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_1d_tile_1d_t function, + void *context, size_t range, + size_t tile, uint32_t flags); + +/** + * Process items on a 2D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * function(context, i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d(pthreadpool_t threadpool, + pthreadpool_task_2d_t function, void *context, + size_t range_i, size_t range_j, uint32_t flags); + +/** + * Process items on a 2D grid passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * function(context, thread_index, i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each item. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_with_thread( + pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function, + void *context, size_t range_i, size_t range_j, uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_2d_tile_1d_t function, + void *context, size_t range_i, + size_t range_j, size_t tile_j, + uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range_i, size_t range_j, size_t tile_j, uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function and passing + * along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, thread_index, i, j, min(range_j - j, + * tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context, + uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, + size_t range_j, size_t tile_j, uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the first dimension of + * the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool, + pthreadpool_task_2d_tile_2d_t function, + void *context, size_t range_i, + size_t range_j, size_t tile_i, + size_t tile_j, uint32_t flags); + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * function(context, uarch_index, i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, + * cpuinfo initialization failed, or index returned + * by cpuinfo_get_current_uarch_index() exceeds + * the max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected + * by the specified function. If the index returned + * by cpuinfo_get_current_uarch_index() exceeds this + * value, default_uarch_index will be used instead. + * default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 2D grid. + * @param range_j the number of items to process along the second + * dimension of the 2D grid. + * @param tile_j the maximum number of items along the first + * dimension of the 2D grid to process in one function call. + * @param tile_j the maximum number of items along the second + * dimension of the 2D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_2d_tile_2d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, + uint32_t flags); + +/** + * Process items on a 3D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * function(context, i, j, k); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d(pthreadpool_t threadpool, + pthreadpool_task_3d_t function, void *context, + size_t range_i, size_t range_j, size_t range_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t tile_k, uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension and passing along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, thread_index, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_with_thread_t function, void *context, + size_t range_i, size_t range_j, size_t range_k, size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range_i, size_t range_j, size_t range_k, size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension using a microarchitecture-aware task function and passing + * along the current thread id. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, thread_index, i, j, k, min(range_k - + * k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread( + pthreadpool_t threadpool, + pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context, + uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, + size_t range_j, size_t range_k, size_t tile_k, uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool, + pthreadpool_task_3d_tile_2d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t tile_j, size_t tile_k, + uint32_t flags); + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * function(context, uarch_index, i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 3D grid. + * @param range_j the number of items to process along the second + * dimension of the 3D grid. + * @param range_k the number of items to process along the third + * dimension of the 3D grid. + * @param tile_j the maximum number of items along the second + * dimension of the 3D grid to process in one function call. + * @param tile_k the maximum number of items along the third + * dimension of the 3D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_3d_tile_2d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range_i, size_t range_j, size_t range_k, size_t tile_j, + size_t tile_k, uint32_t flags); + +/** + * Process items on a 4D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * function(context, i, j, k, l); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d(pthreadpool_t threadpool, + pthreadpool_task_4d_t function, void *context, + size_t range_i, size_t range_j, size_t range_k, + size_t range_l, uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, i, j, k, l, min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_4d_tile_1d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t tile_l, + uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool, + pthreadpool_task_4d_tile_2d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t tile_k, + size_t tile_l, uint32_t flags); + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions using a microarchitecture-aware task function. + * + * The function implements a parallel version of the following snippet: + * + * uint32_t uarch_index = cpuinfo_initialize() ? + * cpuinfo_get_current_uarch_index() : default_uarch_index; + * if (uarch_index > max_uarch_index) uarch_index = default_uarch_index; + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * function(context, uarch_index, i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If + * threadpool is NULL, all items are processed serially on the calling + * thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified + * function. + * @param default_uarch_index the microarchitecture index to use when + * pthreadpool is configured without cpuinfo, cpuinfo initialization failed, + * or index returned by cpuinfo_get_current_uarch_index() exceeds the + * max_uarch_index value. + * @param max_uarch_index the maximum microarchitecture index expected by + * the specified function. If the index returned by + * cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index + * will be used instead. default_uarch_index can exceed max_uarch_index. + * @param range_i the number of items to process along the first + * dimension of the 4D grid. + * @param range_j the number of items to process along the second + * dimension of the 4D grid. + * @param range_k the number of items to process along the third + * dimension of the 4D grid. + * @param range_l the number of items to process along the fourth + * dimension of the 4D grid. + * @param tile_k the maximum number of items along the third + * dimension of the 4D grid to process in one function call. + * @param tile_l the maximum number of items along the fourth + * dimension of the 4D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional + * flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or + * PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_4d_tile_2d_with_uarch( + pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function, + void *context, uint32_t default_uarch_index, uint32_t max_uarch_index, + size_t range_i, size_t range_j, size_t range_k, size_t range_l, + size_t tile_k, size_t tile_l, uint32_t flags); + +/** + * Process items on a 5D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * function(context, i, j, k, l, m); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d(pthreadpool_t threadpool, + pthreadpool_task_5d_t function, void *context, + size_t range_i, size_t range_j, size_t range_k, + size_t range_l, size_t range_m, uint32_t flags); + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * function(context, i, j, k, l, m, min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_5d_tile_1d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t tile_m, uint32_t flags); + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * for (size_t m = 0; m < range_m; m += tile_m) + * function(context, i, j, k, l, m, + * min(range_l - l, tile_l), min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 5D grid to process in one function call. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool, + pthreadpool_task_5d_tile_2d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t tile_l, size_t tile_m, + uint32_t flags); + +/** + * Process items on a 6D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n++) + * function(context, i, j, k, l, m, n); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d(pthreadpool_t threadpool, + pthreadpool_task_6d_t function, void *context, + size_t range_i, size_t range_j, size_t range_k, + size_t range_l, size_t range_m, size_t range_n, + uint32_t flags); + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n += tile_n) + * function(context, i, j, k, l, m, n, min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool, + pthreadpool_task_6d_tile_1d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t range_n, size_t tile_n, + uint32_t flags); + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * for (size_t n = 0; n < range_n; n += tile_n) + * function(context, i, j, k, l, m, n, + * min(range_m - m, tile_m), min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param function the function to call for each tile. + * @param context the first argument passed to the specified function. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 6D grid to process in one function call. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one function call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool, + pthreadpool_task_6d_tile_2d_t function, + void *context, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t range_n, size_t tile_m, + size_t tile_n, uint32_t flags); + +/** + * Terminates threads in the thread pool and releases associated resources. + * + * @warning Accessing the thread pool after a call to this function constitutes + * undefined behaviour and may cause data corruption. + * + * @param[in,out] threadpool The thread pool to destroy. + */ +void pthreadpool_destroy(pthreadpool_t threadpool); + +#ifndef PTHREADPOOL_NO_DEPRECATED_API + +/* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */ +#if defined(__GNUC__) +#define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__)) +#else +#define PTHREADPOOL_DEPRECATED +#endif + +typedef void (*pthreadpool_function_1d_t)(void *, size_t); +typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t); +typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t); +typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t, + size_t); +typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t); +typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t, + size_t, size_t, size_t, size_t, + size_t); + +void pthreadpool_compute_1d(pthreadpool_t threadpool, + pthreadpool_function_1d_t function, void *argument, + size_t range) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool, + pthreadpool_function_1d_tiled_t function, + void *argument, size_t range, + size_t tile) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_2d(pthreadpool_t threadpool, + pthreadpool_function_2d_t function, void *argument, + size_t range_i, + size_t range_j) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool, + pthreadpool_function_2d_tiled_t function, + void *argument, size_t range_i, + size_t range_j, size_t tile_i, + size_t tile_j) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool, + pthreadpool_function_3d_tiled_t function, + void *argument, size_t range_i, + size_t range_j, size_t range_k, size_t tile_i, + size_t tile_j, + size_t tile_k) PTHREADPOOL_DEPRECATED; + +void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool, + pthreadpool_function_4d_tiled_t function, + void *argument, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t tile_i, size_t tile_j, + size_t tile_k, + size_t tile_l) PTHREADPOOL_DEPRECATED; + +#endif /* PTHREADPOOL_NO_DEPRECATED_API */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#ifdef __cplusplus + +namespace libpthreadpool { +namespace detail { +namespace { + +template void call_wrapper_1d(void *arg, size_t i) { + (*static_cast(arg))(i); +} + +template +void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) { + (*static_cast(arg))(range_i, tile_i); +} + +template void call_wrapper_2d(void *functor, size_t i, size_t j) { + (*static_cast(functor))(i, j); +} + +template +void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j, + size_t tile_j) { + (*static_cast(functor))(i, range_j, tile_j); +} + +template +void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j, + size_t tile_i, size_t tile_j) { + (*static_cast(functor))(range_i, range_j, tile_i, tile_j); +} + +template +void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) { + (*static_cast(functor))(i, j, k); +} + +template +void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k, + size_t tile_k) { + (*static_cast(functor))(i, j, range_k, tile_k); +} + +template +void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j, + size_t range_k, size_t tile_j, size_t tile_k) { + (*static_cast(functor))(i, range_j, range_k, tile_j, tile_k); +} + +template +void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) { + (*static_cast(functor))(i, j, k, l); +} + +template +void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k, + size_t range_l, size_t tile_l) { + (*static_cast(functor))(i, j, k, range_l, tile_l); +} + +template +void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k, + size_t range_l, size_t tile_k, size_t tile_l) { + (*static_cast(functor))(i, j, range_k, range_l, tile_k, tile_l); +} + +template +void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l, + size_t m) { + (*static_cast(functor))(i, j, k, l, m); +} + +template +void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k, + size_t l, size_t range_m, size_t tile_m) { + (*static_cast(functor))(i, j, k, l, range_m, tile_m); +} + +template +void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k, + size_t range_l, size_t range_m, size_t tile_l, + size_t tile_m) { + (*static_cast(functor))(i, j, k, range_l, range_m, tile_l, tile_m); +} + +template +void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l, + size_t m, size_t n) { + (*static_cast(functor))(i, j, k, l, m, n); +} + +template +void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k, + size_t l, size_t m, size_t range_n, + size_t tile_n) { + (*static_cast(functor))(i, j, k, l, m, range_n, tile_n); +} + +template +void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k, + size_t l, size_t range_m, size_t range_n, + size_t tile_m, size_t tile_n) { + (*static_cast(functor))(i, j, k, l, range_m, range_n, tile_m, + tile_n); +} + +} /* namespace */ +} /* namespace detail */ +} /* namespace libpthreadpool */ + +/** + * Process items on a 1D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i++) + * functor(i); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each item. + * @param range the number of items on the 1D grid to process. The + * specified functor will be called once for each item. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool, + const T &functor, size_t range, + uint32_t flags = 0) { + pthreadpool_parallelize_1d( + threadpool, &libpthreadpool::detail::call_wrapper_1d, + const_cast(static_cast(&functor)), range, flags); +} + +/** + * Process items on a 1D grid with specified maximum tile size. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range; i += tile) + * functor(i, min(range - i, tile)); + * + * When the call returns, all items have been processed and the thread pool is + * ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, + * the calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range the number of items on the 1D grid to process. + * @param tile the maximum number of items on the 1D grid to process in + * one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range, + size_t tile, + uint32_t flags = 0) { + pthreadpool_parallelize_1d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d, + const_cast(static_cast(&functor)), range, tile, + flags); +} + +/** + * Process items on a 2D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * functor(i, j); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each item. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, uint32_t flags = 0) { + pthreadpool_parallelize_2d( + threadpool, &libpthreadpool::detail::call_wrapper_2d, + const_cast(static_cast(&functor)), range_i, range_j, + flags); +} + +/** + * Process items on a 2D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * functor(i, j, min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t tile_j, + uint32_t flags = 0) { + pthreadpool_parallelize_2d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d, + const_cast(static_cast(&functor)), range_i, range_j, + tile_j, flags); +} + +/** + * Process items on a 2D grid with the specified maximum tile size along each + * grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i += tile_i) + * for (size_t j = 0; j < range_j; j += tile_j) + * functor(i, j, + * min(range_i - i, tile_i), min(range_j - j, tile_j)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 2D grid. + * @param range_j the number of items to process along the second dimension + * of the 2D grid. + * @param tile_j the maximum number of items along the first dimension of + * the 2D grid to process in one functor call. + * @param tile_j the maximum number of items along the second dimension of + * the 2D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t tile_i, + size_t tile_j, + uint32_t flags = 0) { + pthreadpool_parallelize_2d_tile_2d( + threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d, + const_cast(static_cast(&functor)), range_i, range_j, + tile_i, tile_j, flags); +} + +/** + * Process items on a 3D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * functor(i, j, k); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + uint32_t flags = 0) { + pthreadpool_parallelize_3d( + threadpool, &libpthreadpool::detail::call_wrapper_3d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, flags); +} + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * functor(i, j, k, min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t tile_k, + uint32_t flags = 0) { + pthreadpool_parallelize_3d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, tile_k, flags); +} + +/** + * Process items on a 3D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j += tile_j) + * for (size_t k = 0; k < range_k; k += tile_k) + * functor(i, j, k, + * min(range_j - j, tile_j), min(range_k - k, tile_k)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 3D grid. + * @param range_j the number of items to process along the second dimension + * of the 3D grid. + * @param range_k the number of items to process along the third dimension + * of the 3D grid. + * @param tile_j the maximum number of items along the second dimension of + * the 3D grid to process in one functor call. + * @param tile_k the maximum number of items along the third dimension of + * the 3D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t tile_j, size_t tile_k, + uint32_t flags = 0) { + pthreadpool_parallelize_3d_tile_2d( + threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, tile_j, tile_k, flags); +} + +/** + * Process items on a 4D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * functor(i, j, k, l); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, uint32_t flags = 0) { + pthreadpool_parallelize_4d( + threadpool, &libpthreadpool::detail::call_wrapper_4d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, flags); +} + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * functor(i, j, k, l, min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t tile_l, + uint32_t flags = 0) { + pthreadpool_parallelize_4d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, tile_l, flags); +} + +/** + * Process items on a 4D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k += tile_k) + * for (size_t l = 0; l < range_l; l += tile_l) + * functor(i, j, k, l, + * min(range_k - k, tile_k), min(range_l - l, tile_l)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 4D grid. + * @param range_j the number of items to process along the second dimension + * of the 4D grid. + * @param range_k the number of items to process along the third dimension + * of the 4D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 4D grid. + * @param tile_k the maximum number of items along the third dimension of + * the 4D grid to process in one functor call. + * @param tile_l the maximum number of items along the fourth dimension of + * the 4D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t tile_k, + size_t tile_l, + uint32_t flags = 0) { + pthreadpool_parallelize_4d_tile_2d( + threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, tile_k, tile_l, flags); +} + +/** + * Process items on a 5D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * functor(i, j, k, l, m); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void +pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor, + size_t range_i, size_t range_j, size_t range_k, + size_t range_l, size_t range_m, uint32_t flags = 0) { + pthreadpool_parallelize_5d( + threadpool, &libpthreadpool::detail::call_wrapper_5d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, flags); +} + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * functor(i, j, k, l, m, min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t tile_m, + uint32_t flags = 0) { + pthreadpool_parallelize_5d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, tile_m, flags); +} + +/** + * Process items on a 5D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l += tile_l) + * for (size_t m = 0; m < range_m; m += tile_m) + * functor(i, j, k, l, m, + * min(range_l - l, tile_l), min(range_m - m, tile_m)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 5D grid. + * @param range_j the number of items to process along the second dimension + * of the 5D grid. + * @param range_k the number of items to process along the third dimension + * of the 5D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 5D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 5D grid. + * @param tile_l the maximum number of items along the fourth dimension of + * the 5D grid to process in one functor call. + * @param tile_m the maximum number of items along the fifth dimension of + * the 5D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t tile_l, size_t tile_m, + uint32_t flags = 0) { + pthreadpool_parallelize_5d_tile_2d( + threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, tile_l, tile_m, flags); +} + +/** + * Process items on a 6D grid. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n++) + * functor(i, j, k, l, m, n); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t range_n, uint32_t flags = 0) { + pthreadpool_parallelize_6d( + threadpool, &libpthreadpool::detail::call_wrapper_6d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, range_n, flags); +} + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last grid dimension. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m++) + * for (size_t n = 0; n < range_n; n += tile_n) + * functor(i, j, k, l, m, n, min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool, + const T &functor, size_t range_i, + size_t range_j, size_t range_k, + size_t range_l, size_t range_m, + size_t range_n, size_t tile_n, + uint32_t flags = 0) { + pthreadpool_parallelize_6d_tile_1d( + threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, range_n, tile_n, flags); +} + +/** + * Process items on a 6D grid with the specified maximum tile size along the + * last two grid dimensions. + * + * The function implements a parallel version of the following snippet: + * + * for (size_t i = 0; i < range_i; i++) + * for (size_t j = 0; j < range_j; j++) + * for (size_t k = 0; k < range_k; k++) + * for (size_t l = 0; l < range_l; l++) + * for (size_t m = 0; m < range_m; m += tile_m) + * for (size_t n = 0; n < range_n; n += tile_n) + * functor(i, j, k, l, m, n, + * min(range_m - m, tile_m), min(range_n - n, tile_n)); + * + * When the function returns, all items have been processed and the thread pool + * is ready for a new task. + * + * @note If multiple threads call this function with the same thread pool, the + * calls are serialized. + * + * @param threadpool the thread pool to use for parallelisation. If threadpool + * is NULL, all items are processed serially on the calling thread. + * @param functor the functor to call for each tile. + * @param range_i the number of items to process along the first dimension + * of the 6D grid. + * @param range_j the number of items to process along the second dimension + * of the 6D grid. + * @param range_k the number of items to process along the third dimension + * of the 6D grid. + * @param range_l the number of items to process along the fourth dimension + * of the 6D grid. + * @param range_m the number of items to process along the fifth dimension + * of the 6D grid. + * @param range_n the number of items to process along the sixth dimension + * of the 6D grid. + * @param tile_m the maximum number of items along the fifth dimension of + * the 6D grid to process in one functor call. + * @param tile_n the maximum number of items along the sixth dimension of + * the 6D grid to process in one functor call. + * @param flags a bitwise combination of zero or more optional flags + * (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS) + */ +template +inline void pthreadpool_parallelize_6d_tile_2d( + pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j, + size_t range_k, size_t range_l, size_t range_m, size_t range_n, + size_t tile_m, size_t tile_n, uint32_t flags = 0) { + pthreadpool_parallelize_6d_tile_2d( + threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d, + const_cast(static_cast(&functor)), range_i, range_j, + range_k, range_l, range_m, range_n, tile_m, tile_n, flags); +} + +#endif /* __cplusplus */ + +#endif /* PTHREADPOOL_H_ */ diff --git a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h b/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h deleted file mode 100644 index 42a59e94e5..0000000000 --- a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h +++ /dev/null @@ -1,61 +0,0 @@ -/*! - * Copyright (c) 2023 by Contributors - * \file tokenizers_c.h - * \brief C binding to tokenizers rust library - */ -#ifndef TOKENIZERS_C_H_ -#define TOKENIZERS_C_H_ - -// The C API -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -typedef void *TokenizerHandle; - -typedef struct { - int *token_ids; - size_t len; -} TokenizerEncodeResult; - -TokenizerHandle tokenizers_new_from_str(const char *json, size_t len); - -TokenizerHandle byte_level_bpe_tokenizers_new_from_str( - const char *vocab, size_t vocab_len, const char *merges, size_t merges_len, - const char *added_tokens, size_t added_tokens_len); - -void tokenizers_encode(TokenizerHandle handle, const char *data, size_t len, - int add_special_token, TokenizerEncodeResult *result); - -void tokenizers_encode_batch(TokenizerHandle handle, const char **data, - size_t *len, size_t num_seqs, - int add_special_token, - TokenizerEncodeResult *results); - -void tokenizers_free_encode_results(TokenizerEncodeResult *results, - size_t num_seqs); - -void tokenizers_decode(TokenizerHandle handle, const uint32_t *data, size_t len, - int skip_special_token); - -void tokenizers_get_decode_str(TokenizerHandle handle, const char **data, - size_t *len); - -void tokenizers_get_vocab_size(TokenizerHandle handle, size_t *size); - -void tokenizers_id_to_token(TokenizerHandle handle, uint32_t id, - const char **data, size_t *len); - -// tokenizers_token_to_id stores -1 to *id if the token is not in the vocab -void tokenizers_token_to_id(TokenizerHandle handle, const char *token, - size_t len, int32_t *id); - -void tokenizers_free(TokenizerHandle handle); - -#ifdef __cplusplus -} -#endif -#endif // TOKENIZERS_C_H_ diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj index bfa259f667..eedabc07e0 100644 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj +++ b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj @@ -21,31 +21,7 @@ 5576B4B72CEF9709005027B7 /* ETModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 5576B4B62CEF9705005027B7 /* ETModel.h */; settings = {ATTRIBUTES = (Public, ); }; }; 5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5576B4B82CEF970C005027B7 /* ETModel.mm */; }; 558699BB2D8AD562004180E5 /* re2.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5576B49E2CEF76CC005027B7 /* re2.xcframework */; }; - 55A815F22D9D40680075A106 /* irunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55A815F12D9D40680075A106 /* irunner.h */; }; 55DEEA382D05ABBB004422A3 /* InputType.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEA372D05ABB4004422A3 /* InputType.h */; }; - 55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */; }; - 55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C262CB90C7A004315B3 /* runner.cpp */; }; - 55EA2C402CB90C7A004315B3 /* runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C272CB90C7A004315B3 /* runner.h */; }; - 55EA2C412CB90C7A004315B3 /* stats.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C282CB90C7A004315B3 /* stats.h */; }; - 55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */; }; - 55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */; }; - 55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */; }; - 55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */; }; - 55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */; }; - 55EA2C472CB90C7A004315B3 /* util.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2E2CB90C7A004315B3 /* util.h */; }; - 55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C302CB90C7A004315B3 /* sampler.cpp */; }; - 55EA2C492CB90C7A004315B3 /* sampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C312CB90C7A004315B3 /* sampler.h */; }; - 55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */; }; - 55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C362CB90C7A004315B3 /* base64.h */; }; - 55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */; }; - 55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */; }; - 55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */; }; - 55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */; }; - 55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */; }; - 55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3C2CB90C7A004315B3 /* tiktoken.h */; }; - 55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3D2CB90C7A004315B3 /* tokenizer.h */; }; - 55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C332CB90C7A004315B3 /* LLaMARunner.h */; settings = {ATTRIBUTES = (Public, ); }; }; 55EA2C572CB90E7D004315B3 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C562CB90E7D004315B3 /* Accelerate.framework */; }; 55EA2C592CB90E80004315B3 /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C582CB90E80004315B3 /* CoreML.framework */; }; 55EA2C5B2CB90E85004315B3 /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */; }; @@ -67,32 +43,8 @@ 5576B49E2CEF76CC005027B7 /* re2.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = re2.xcframework; path = frameworks/re2.xcframework; sourceTree = ""; }; 5576B4B62CEF9705005027B7 /* ETModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ETModel.h; sourceTree = ""; }; 5576B4B82CEF970C005027B7 /* ETModel.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ETModel.mm; sourceTree = ""; }; - 55A815F12D9D40680075A106 /* irunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = irunner.h; sourceTree = ""; }; 55DEEA372D05ABB4004422A3 /* InputType.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = InputType.h; sourceTree = ""; }; - 55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HuggingFaceTokenizer.h; sourceTree = ""; }; - 55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = HuggingFaceTokenizer.mm; sourceTree = ""; }; 55EA2C1C2CB90C22004315B3 /* ExecutorchLib.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ExecutorchLib.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - 55EA2C262CB90C7A004315B3 /* runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = runner.cpp; sourceTree = ""; }; - 55EA2C272CB90C7A004315B3 /* runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = runner.h; sourceTree = ""; }; - 55EA2C282CB90C7A004315B3 /* stats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stats.h; sourceTree = ""; }; - 55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = ""; }; - 55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = ""; }; - 55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_prefiller.cpp; sourceTree = ""; }; - 55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = ""; }; - 55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_token_generator.h; sourceTree = ""; }; - 55EA2C2E2CB90C7A004315B3 /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = ""; }; - 55EA2C302CB90C7A004315B3 /* sampler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = ""; }; - 55EA2C312CB90C7A004315B3 /* sampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = ""; }; - 55EA2C332CB90C7A004315B3 /* LLaMARunner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LLaMARunner.h; sourceTree = ""; }; - 55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaMARunner.mm; sourceTree = ""; }; - 55EA2C362CB90C7A004315B3 /* base64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = ""; }; - 55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bpe_tokenizer.cpp; sourceTree = ""; }; - 55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer.h; sourceTree = ""; }; - 55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = llama_tiktoken.cpp; sourceTree = ""; }; - 55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = llama_tiktoken.h; sourceTree = ""; }; - 55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tiktoken.cpp; sourceTree = ""; }; - 55EA2C3C2CB90C7A004315B3 /* tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = ""; }; - 55EA2C3D2CB90C7A004315B3 /* tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = ""; }; 55EA2C562CB90E7D004315B3 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; 55EA2C582CB90E80004315B3 /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; }; 55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.tbd; path = usr/lib/libsqlite3.tbd; sourceTree = SDKROOT; }; @@ -147,68 +99,20 @@ children = ( 55DEEA372D05ABB4004422A3 /* InputType.h */, 55EA2C352CB90C7A004315B3 /* Exported */, - 55EA2C2F2CB90C7A004315B3 /* runner */, - 55EA2C322CB90C7A004315B3 /* sampler */, - 55EA2C3E2CB90C7A004315B3 /* tokenizer */, A851C4042CF9F1B600424E93 /* Utils.hpp */, ); path = ExecutorchLib; sourceTree = ""; }; - 55EA2C2F2CB90C7A004315B3 /* runner */ = { - isa = PBXGroup; - children = ( - 55A815F12D9D40680075A106 /* irunner.h */, - 55EA2C262CB90C7A004315B3 /* runner.cpp */, - 55EA2C272CB90C7A004315B3 /* runner.h */, - 55EA2C282CB90C7A004315B3 /* stats.h */, - 55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */, - 55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */, - 55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */, - 55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */, - 55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */, - 55EA2C2E2CB90C7A004315B3 /* util.h */, - ); - path = runner; - sourceTree = ""; - }; - 55EA2C322CB90C7A004315B3 /* sampler */ = { - isa = PBXGroup; - children = ( - 55EA2C302CB90C7A004315B3 /* sampler.cpp */, - 55EA2C312CB90C7A004315B3 /* sampler.h */, - ); - path = sampler; - sourceTree = ""; - }; 55EA2C352CB90C7A004315B3 /* Exported */ = { isa = PBXGroup; children = ( - 55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */, - 55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */, 5576B4B82CEF970C005027B7 /* ETModel.mm */, 5576B4B62CEF9705005027B7 /* ETModel.h */, - 55EA2C332CB90C7A004315B3 /* LLaMARunner.h */, - 55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */, ); path = Exported; sourceTree = ""; }; - 55EA2C3E2CB90C7A004315B3 /* tokenizer */ = { - isa = PBXGroup; - children = ( - 55EA2C362CB90C7A004315B3 /* base64.h */, - 55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */, - 55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */, - 55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */, - 55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */, - 55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */, - 55EA2C3C2CB90C7A004315B3 /* tiktoken.h */, - 55EA2C3D2CB90C7A004315B3 /* tokenizer.h */, - ); - path = tokenizer; - sourceTree = ""; - }; 55EA2C552CB90E7D004315B3 /* Frameworks */ = { isa = PBXGroup; children = ( @@ -238,24 +142,9 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( - 55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */, 5576B4B72CEF9709005027B7 /* ETModel.h in Headers */, - 55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */, - 55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */, 55DEEA382D05ABBB004422A3 /* InputType.h in Headers */, - 55EA2C412CB90C7A004315B3 /* stats.h in Headers */, - 55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */, - 55EA2C402CB90C7A004315B3 /* runner.h in Headers */, - 55A815F22D9D40680075A106 /* irunner.h in Headers */, - 55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */, - 55EA2C492CB90C7A004315B3 /* sampler.h in Headers */, - 55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */, - 55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */, - 55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */, - 55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */, - 55EA2C472CB90C7A004315B3 /* util.h in Headers */, A851C4072CF9F1B600424E93 /* Utils.hpp in Headers */, - 55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -266,7 +155,6 @@ isa = PBXNativeTarget; buildConfigurationList = 55EA2C232CB90C22004315B3 /* Build configuration list for PBXNativeTarget "ExecutorchLib" */; buildPhases = ( - 558699CC2D8B1D30004180E5 /* Build tokenizers cpp */, 55EA2C172CB90C22004315B3 /* Headers */, 55EA2C182CB90C22004315B3 /* Sources */, 55EA2C192CB90C22004315B3 /* Frameworks */, @@ -327,43 +215,12 @@ }; /* End PBXResourcesBuildPhase section */ -/* Begin PBXShellScriptBuildPhase section */ - 558699CC2D8B1D30004180E5 /* Build tokenizers cpp */ = { - isa = PBXShellScriptBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - inputFileListPaths = ( - ); - inputPaths = ( - ); - name = "Build tokenizers cpp"; - outputFileListPaths = ( - ); - outputPaths = ( - "$(DERIVED_FILE_DIR)/newOutputFile", - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run `sudo /Applications/CMake.app/Contents/bin/cmake-gui --install` to install CMake commandline tools.\"\n exit 1\nfi\n\n# Type a script or drag a script file from your workspace to insert its path.\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == \"iphoneos\" ]]; then\n TOKENIZERS_TARGET=\"aarch64-apple-ios\"\nelif [[ \"$PLATFORM_NAME\" == \"iphonesimulator\" ]]; then\n TOKENIZERS_TARGET=\"x86_64-apple-ios\"\nfi\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n PLATFORM=\"MAC_ARM64\"\n DEPLOYMENT_TARGET=\"10.15\"\nfi\n\n\ncmake_build_tokenizers() {\n export PATH=\"$PATH:$HOME/.cargo/bin\"\n export PATH=\"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib:$PATH\"\n export LIBRARY_PATH=\"$LIBRARY_PATH:/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib\"\n local src_dir=$1\n local target=$2\n local extra_args=(\"$@\")\n local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n mkdir -p \"$build_dir\" && cd \"$build_dir\"\n \n if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n fi\n \n cmake \"$src_dir\" -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../ios/ios.toolchain.cmake\" \\\n -DPLATFORM=\"$PLATFORM\" \\\n -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n -DENABLE_BITCODE=FALSE \\\n -DENABLE_ARC=TRUE \\\n -DENABLE_VISIBILITY=TRUE \\\n -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n -DMLC_ENABLE_SENTENCEPIECE_TOKENIZER=ON\n \n cmake --build . --config \"Release\" --target \"install\"\n}\n\ncmake_build_tokenizers \"$SRCROOT/../../../../../third-party/tokenizers-cpp\" \"install\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n"; - }; -/* End PBXShellScriptBuildPhase section */ - /* Begin PBXSourcesBuildPhase section */ 55EA2C182CB90C22004315B3 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */, - 55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */, - 55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */, - 55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */, - 55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */, 5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */, - 55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */, - 55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */, - 55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */, - 55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -542,7 +399,6 @@ "$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a", "-force_load", "$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a", - "@$(TEMP_DIR)/cmake/linker_flags", ); "OTHER_LDFLAGS[sdk=iphonesimulator*]" = ( "$(inherited)", @@ -560,7 +416,6 @@ "$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a", "-force_load", "$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a", - "@$(TEMP_DIR)/cmake/linker_flags", ); PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; @@ -624,7 +479,6 @@ "$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a", "-force_load", "$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a", - "@$(TEMP_DIR)/cmake/linker_flags", ); "OTHER_LDFLAGS[sdk=iphonesimulator*]" = ( "$(inherited)", @@ -642,7 +496,6 @@ "$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a", "-force_load", "$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a", - "@$(TEMP_DIR)/cmake/linker_flags", ); PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h deleted file mode 100644 index 4332cf811d..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h +++ /dev/null @@ -1,14 +0,0 @@ -#import - -@interface HuggingFaceTokenizer : NSObject - -- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath; -- (NSArray *)encode:(NSString *)text; -- (NSString *)decode:(NSArray *)tokenIds; -- (NSString *)decode:(NSArray *)tokenIds - skipSpecialTokens:(BOOL)skipSpecialTokens; -- (NSUInteger)getVocabSize; -- (NSString *)idToToken:(NSInteger)tokenId; -- (NSInteger)tokenToId:(NSString *)token; - -@end diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm deleted file mode 100644 index 38d7cdfd09..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm +++ /dev/null @@ -1,80 +0,0 @@ -#import "HuggingFaceTokenizer.h" -#include -#include -#include -#include -#include - -std::string loadBytesFromFile(const std::string &path) { - std::ifstream fs(path, std::ios::in | std::ios::binary); - if (fs.fail()) { - throw std::runtime_error("Failed to open tokenizer file"); - } - std::string data; - fs.seekg(0, std::ios::end); - size_t size = static_cast(fs.tellg()); - fs.seekg(0, std::ios::beg); - data.resize(size); - fs.read(data.data(), size); - return data; -} - -@implementation HuggingFaceTokenizer { - std::unique_ptr _tokenizer; -} - -- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath { - self = [super init]; - if (self) { - auto blob = loadBytesFromFile([tokenizerPath UTF8String]); - _tokenizer = tokenizers::Tokenizer::FromBlobJSON(blob); - } - return self; -} - -- (NSArray *)encode:(NSString *)text { - std::vector result = _tokenizer->Encode([text UTF8String]); - NSMutableArray *encodedResult = - [[NSMutableArray alloc] initWithCapacity:result.size()]; - for (int32_t tokenId : result) { - [encodedResult addObject:@(tokenId)]; - } - - return encodedResult; -} - -- (NSString *)decode:(NSArray *)tokenIds { - return [self decode:tokenIds skipSpecialTokens:NO]; -} - -- (NSString *)decode:(NSArray *)tokenIds - skipSpecialTokens:(BOOL)skipSpecialTokens { - std::vector stdTokenIds; - stdTokenIds.reserve([tokenIds count]); - for (NSNumber *tokenId in tokenIds) { - stdTokenIds.push_back([tokenId intValue]); - } - std::string decodedString = - _tokenizer->Decode(stdTokenIds, skipSpecialTokens); - return [NSString stringWithUTF8String:decodedString.c_str()]; -} - -- (NSUInteger)getVocabSize { - return (NSUInteger)_tokenizer->GetVocabSize(); -} - -- (NSString *)idToToken:(NSInteger)tokenId { - std::string token = _tokenizer->IdToToken(static_cast(tokenId)); - return [NSString stringWithUTF8String:token.c_str()]; -} - -- (NSInteger)tokenToId:(NSString *)token { - std::string stdToken = [token UTF8String]; - return (NSInteger)_tokenizer->TokenToId(stdToken); -} - -- (void)dealloc { - _tokenizer.reset(); -} - -@end diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h deleted file mode 100644 index d8638cfa6a..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#import - -NS_ASSUME_NONNULL_BEGIN - -FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain; - -NS_SWIFT_NAME(Runner) -@interface LLaMARunner : NSObject - -- (instancetype)initWithModelPath:(NSString *)filePath - tokenizerPath:(NSString *)tokenizerPath; -- (BOOL)isLoaded; -- (BOOL)loadWithError:(NSError **)error; -- (BOOL)generate:(NSString *)prompt - withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error; -- (void)stop; - -+ (instancetype)new NS_UNAVAILABLE; -- (instancetype)init NS_UNAVAILABLE; - -@end - -NS_ASSUME_NONNULL_END diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm deleted file mode 100644 index b50bfb7b01..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#import "LLaMARunner.h" - -#import "runner.h" -#import - -using namespace ::torch::executor; - -NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain"; - -@interface LLaMARunner () -@end - -@implementation LLaMARunner { - std::unique_ptr _runner; -} - -- (instancetype)initWithModelPath:(NSString *)modelPath - tokenizerPath:(NSString *)tokenizerPath { - self = [super init]; - if (self) { - [ExecuTorchLog.sharedLog addSink:self]; - _runner = std::make_unique(modelPath.UTF8String, - tokenizerPath.UTF8String); - } - return self; -} - -- (void)dealloc { - [ExecuTorchLog.sharedLog removeSink:self]; -} - -- (BOOL)isLoaded { - return _runner->is_loaded(); -} - -- (BOOL)loadWithError:(NSError **)error { - const auto status = _runner->load(); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaMARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - } - return NO; - } - return YES; -} - -- (BOOL)generate:(NSString *)prompt - withTokenCallback:(nullable void (^)(NSString *))callback - error:(NSError **)error { - const auto status = _runner->generate( - prompt.UTF8String, - [callback](const std::string &token) { callback(@(token.c_str())); }); - if (status != Error::Ok) { - if (error) { - *error = [NSError errorWithDomain:LLaMARunnerErrorDomain - code:(NSInteger)status - userInfo:nil]; - return NO; - } - } - return YES; -} - -- (void)stop { - _runner->stop(); -} - -#pragma mark - ExecuTorchLogSink - -- (void)logWithLevel:(ExecuTorchLogLevel)level - timestamp:(NSTimeInterval)timestamp - filename:(NSString *)filename - line:(NSUInteger)line - message:(NSString *)message { - NSUInteger totalSeconds = (NSUInteger)timestamp; - NSUInteger hours = (totalSeconds / 3600) % 24; - NSUInteger minutes = (totalSeconds / 60) % 60; - NSUInteger seconds = totalSeconds % 60; - NSUInteger microseconds = (timestamp - totalSeconds) * 1000000; - NSLog(@"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s", (char)level, hours, - minutes, seconds, microseconds, filename.UTF8String, line, - message.UTF8String); -} - -@end diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h deleted file mode 100644 index 722fe3900e..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ -// @lint-ignore-every LICENSELINT -/************************************************************************** - Copyright (c) 2023 sewenew - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - *************************************************************************/ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace executorch { -namespace extension { -namespace llm { -using Error = executorch::runtime::Error; -template using Result = executorch::runtime::Result; - -namespace base64 { - -Result decode(const std::string_view &input); - -namespace detail { - -constexpr uint32_t DECODE_TABLE[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, - 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - -inline Error validate(uint32_t v) { - ET_CHECK_OR_RETURN_ERROR(v != 255, InvalidArgument, "invalid char"); - return Error::Ok; -} - -inline Error decode(const std::string_view &input, std::string &output) { - ET_CHECK_OR_RETURN_ERROR(input.size() == 4, InvalidArgument, - "input length must be 4, got %zu", input.size()); - - uint32_t val = 0; - - uint8_t c = input[0]; - auto v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = v; - - c = input[1]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - c = input[2]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - c = input[3]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - output.push_back(static_cast((val >> 16) & 0xFF)); - output.push_back(static_cast((val >> 8) & 0xFF)); - output.push_back(static_cast(val & 0xFF)); - return Error::Ok; -} - -inline Error decode_1_padding(const std::string_view &input, - std::string &output) { - ET_CHECK_OR_RETURN_ERROR(input.size() == 3, InvalidArgument, - "input length must be 3, got %zu", input.size()); - - uint32_t val = 0; - - uint8_t c = input[0]; - auto v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = v; - - c = input[1]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - c = input[2]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - output.push_back(static_cast((val >> 10) & 0xFF)); - output.push_back(static_cast((val >> 2) & 0xFF)); - return Error::Ok; -} - -inline Error decode_2_padding(const std::string_view &input, - std::string &output) { - ET_CHECK_OR_RETURN_ERROR(input.size() == 2, InvalidArgument, - "input length must be 2, got %zu", input.size()); - - uint32_t val = 0; - - uint8_t c = input[0]; - auto v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = v; - - c = input[1]; - v = DECODE_TABLE[c]; - ET_CHECK_OK_OR_RETURN_ERROR(validate(v)); - val = (val << 6) | v; - - output.push_back(static_cast((val >> 4) & 0xFF)); - return Error::Ok; -} - -} // namespace detail - -inline Result decode(const std::string_view &input) { - ET_CHECK_OR_RETURN_ERROR(!input.empty(), InvalidArgument, "empty input"); - - // Faster than `input.size() % 4`. - ET_CHECK_OR_RETURN_ERROR( - (input.size() & 3) == 0 && input.size() >= 4, InvalidArgument, - "input length must be larger than 4 and is multiple of 4, got %zu", - input.size()); - - std::string output; - output.reserve(input.size() / 4 * 3); - auto idx = 0U; - for (; idx < input.size() - 4; idx += 4) { - ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output)); - } - - // Last 4 bytes. Might contain paddings. - if (input[idx + 3] == '=') { - if (input[idx + 2] == '=') { - // Tow paddings. - ET_CHECK_OK_OR_RETURN_ERROR( - detail::decode_2_padding(input.substr(idx, 2), output)); - } else { - // One padding. - ET_CHECK_OK_OR_RETURN_ERROR( - detail::decode_1_padding(input.substr(idx, 3), output)); - } - } else { - // No padding. - ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output)); - } - - return output; -} - -} // namespace base64 - -} // namespace llm -} // namespace extension -} // namespace executorch - -namespace torch { -namespace executor { -namespace base64 { -// TODO(T197294990): Remove these deprecated aliases once all users have moved -// to the new `::executorch` namespaces. -using ::executorch::extension::llm::base64::decode; -} // namespace base64 -} // namespace executor -} // namespace torch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp deleted file mode 100644 index aa0a6d1baa..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "bpe_tokenizer.h" - -#include - -using ::executorch::runtime::Error; -using ::executorch::runtime::Result; - -namespace executorch { -namespace extension { -namespace llm { - -static int compare_tokens(const void *a, const void *b) { - if (((TokenIndex *)a)->str == nullptr) { - return -1; - } - if (((TokenIndex *)b)->str == nullptr) { - return 1; - } - return strcmp(((TokenIndex *)a)->str, ((TokenIndex *)b)->str); -} - -BPETokenizer::BPETokenizer() : Tokenizer() { - for (int i = 0; i < 256; i++) { - byte_pieces_[i * 2] = (unsigned char)i; - byte_pieces_[i * 2 + 1] = '\0'; - } -} - -/** - * @brief Load the tokenizer from a file. The tokenizer file contains the - * vocabulary and scores. The format is: the first integer is the maximum - * token length, followed by a list of (word_len, word) pairs. Here we - * are reading all the vocabulary into memory and keep it sorted for fast - * lookup. - * - * @param tokenizer_path The path to the tokenizer file. - * @return Error - */ -Error BPETokenizer::load(const std::string &tokenizer_path) { - if (initialized_) { - ET_LOG(Info, "Tokenizer already initialized"); - return Error::Ok; - } - // read in the file - FILE *file = fopen(tokenizer_path.c_str(), "rb"); - if (!file) { - ET_LOG(Error, "couldn't load %s", tokenizer_path.c_str()); - return Error::InvalidArgument; - } - int32_t metadata[4]; - for (int i = 0; i < 4; i++) { - if (fread(metadata + i, sizeof(int32_t), 1, file) != 1) { - ET_LOG(Error, - "Failed to read the metadata at position %d, the tokenizer file " - "is not valid!", - i); - return Error::InvalidArgument; - } - } - - // now we have two vocab_sizes one from the model and another from the - // tokenizer file. - int32_t tokenizer_vocab_size = metadata[0]; - vocab_size_ = tokenizer_vocab_size; - bos_tok_ = metadata[1]; - eos_tok_ = metadata[2]; - max_token_length_ = metadata[3]; - - // allocate space for the vocabulary - vocab_ = std::make_unique(vocab_size_); - vocab_scores_ = std::make_unique(vocab_size_); - sorted_vocab_ = std::make_unique(vocab_size_); - - // read in the vocabulary - for (int i = 0; i < vocab_size_; i++) { - if (fread(vocab_scores_.get() + i, sizeof(float), 1, file) != 1) { - // This is allowed, we just pad the rest of the vocab with strings - std::string padding = ""; - vocab_[i] = new char[padding.length() + 1]; - strcpy(vocab_[i], padding.c_str()); - vocab_[i][padding.length()] = '\0'; - continue; - } - int32_t len; - if (fread(&len, sizeof(int32_t), 1, file) != 1) { - ET_LOG(Error, "Failed to read the length of the word at index %d", i); - return Error::InvalidArgument; - } - vocab_[i] = new char[len + 1]; - if (fread(vocab_[i], len, 1, file) != 1) { - ET_LOG(Error, "Failed to read the word, total length %d, index %d\n", len, - i); - return Error::InvalidArgument; - } - vocab_[i][len] = '\0'; // add the string terminating token - } - fclose(file); - - for (int32_t i = 0; i < vocab_size_; i++) { - sorted_vocab_[i].str = vocab_[i]; - sorted_vocab_[i].id = i; - } - qsort(sorted_vocab_.get(), vocab_size_, sizeof(TokenIndex), compare_tokens); - - initialized_ = true; - return Error::Ok; -} - -BPETokenizer::~BPETokenizer() { - for (int i = 0; i < vocab_size_; i++) { - delete[] vocab_[i]; - } -} - -/** - * @brief Decode a token into string. - * - * @param prev_token The previous token. - * @param token The current token. - * @return Result A pointer to the string representation of the - * token. - */ -Result BPETokenizer::decode(uint64_t prev_token, - uint64_t token) const { - ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(token)); - const char *piece = vocab_[token]; - // following BOS token, sentencepiece decoder strips any leading - // whitespace - if (prev_token == bos_tok_ && piece[0] == ' ') { - piece++; - } - // careful, some tokens designate raw bytes, and look like e.g. '<0x01>' - // parse this and convert and return the actual byte - unsigned char byte_val; - if (sscanf(piece, "<0x%02hhX>", &byte_val) == 1) { - piece = (char *)byte_pieces_ + byte_val * 2; - } - std::string res(piece); - return res; -} - -static int32_t str_lookup(const char *str, TokenIndex *sorted_vocab, - int32_t vocab_size) { - // efficiently find the perfect match for str in vocab, return its index or -1 - // if not found - TokenIndex tok = {.str = str}; // acts as the key to search for - TokenIndex *res = (TokenIndex *)bsearch(&tok, sorted_vocab, vocab_size, - sizeof(TokenIndex), compare_tokens); - return res != nullptr ? res->id : -1; -} - -/** - * @brief Encode a string into a sequence of tokens. - * - * @param text The string to be encoded. - * @param bos The number of BOS to prepend to the token list. - * @param eos The number of EOS to append to the token list. - * @param tokens The output tokens. - * @param n_tokens The number of tokens. - * @return Result> - */ -Result> -BPETokenizer::encode(const std::string &text, int8_t bos, int8_t eos) const { - if (!initialized_) { - ET_LOG(Error, "Tokenizer not initialized"); - return Error::NotSupported; - } - // encode the string text (input) into an upper-bound preallocated tokens[] - // array bos != 0 means prepend the BOS token (=1), eos != 0 means append the - // EOS token (=2) - if (text.empty()) { - ET_LOG(Error, "cannot encode empty text"); - return Error::InvalidArgument; - } - - // create a temporary buffer that will store merge candidates of always two - // consecutive tokens *2 for concat, +1 for null terminator +2 for UTF8 (in - // case max_token_length is 1) - char *str_buffer = new char[max_token_length_ * 2 + 1 + 2]; - size_t str_len = 0; - - // start at 0 tokens - std::vector tokens; - - // add optional BOS token, if desired - if (bos >= 0) { - while (bos--) { - tokens.push_back(bos_tok_); - } - } else { - ET_LOG(Error, "bos %d should be >= 0", bos); - return Error::InvalidArgument; - } - - // add_dummy_prefix is true by default - // so prepend a dummy prefix token to the input string, but only if text != "" - // TODO: pretty sure this isn't correct in the general case but I don't have - // the energy to read more of the sentencepiece code to figure out what it's - // doing - const char *space = " "; - if (text[0] != '\0') { - int dummy_prefix = str_lookup(space, sorted_vocab_.get(), vocab_size_); - tokens.push_back(dummy_prefix); - } - - // Okay UTF-8 time. This will get messy. Here is the reference from Wikipedia: - // Code point ↔ UTF-8 conversion - // First code point Last code point Byte 1 Byte 2 Byte 3 Byte 4 - // U+0000 U+007F 0xxxxxxx - // U+0080 U+07FF 110xxxxx 10xxxxxx - // U+0800 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx - // U+10000 U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - - // process the raw (UTF-8) byte sequence of the input string - for (const char *c = text.c_str(); *c != '\0'; c++) { - // reset buffer if the current byte is ASCII or a leading byte - // 0xC0 is 11000000, so (*c & 0xC0) keeps the first 2 bits and zeros the - // rest 0x80 is 10000000 in UTF-8, all continuation bytes start with "10" in - // first two bits so in English this is: "if this byte is not a continuation - // byte" - if ((*c & 0xC0) != 0x80) { - // this byte must be either a leading byte (11...) or an ASCII char - // (0x...) - // => reset our location, as we're starting a new UTF-8 codepoint - str_len = 0; - } - - // append the current byte to the buffer - str_buffer[str_len++] = - *c; // ++ is post-increment, incremented after this line - str_buffer[str_len] = '\0'; - - // while the next character is a continuation byte, continue appending - // but if there are too many of them, just stop to avoid overruning - // str_buffer size. - if ((*(c + 1) & 0xC0) == 0x80 && str_len < 4) { - continue; - } - - // ok c+1 is not a continuation byte, so we've read in a full codepoint - int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_); - if (id != -1) { - // we found this codepoint in vocab, add it as a token - tokens.push_back(id); - } else { - // byte_fallback encoding: just encode each byte as a token - // +3 is here because the first 3 vocab elements are , , - // so the individual bytes only start at index 3 - for (int i = 0; i < str_len; i++) { - tokens.push_back((unsigned char)str_buffer[i] + 3); - } - } - str_len = 0; // protect against a sequence of stray UTF8 continuation bytes - } - - // merge the best consecutive pair each iteration, according the scores in - // vocab_scores - while (1) { - float best_score = -1e10; - int best_id = -1; - int best_idx = -1; - - for (int i = 0; i < tokens.size() - 1; i++) { - // check if we can merge the pair (tokens[i], tokens[i+1]) - snprintf(str_buffer, max_token_length_ * 2 + 3, "%s%s", vocab_[tokens[i]], - vocab_[tokens[i + 1]]); - int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_); - if (id != -1 && vocab_scores_[id] > best_score) { - // this merge pair exists in vocab! record its score and position - best_score = vocab_scores_[id]; - best_id = id; - best_idx = i; - } - } - - if (best_idx == -1) { - break; // we couldn't find any more pairs to merge, so we're done - } - - // merge the consecutive pair (best_idx, best_idx+1) into new token best_id - tokens[best_idx] = best_id; - // delete token at position best_idx+1, shift the entire sequence back 1 - for (int i = best_idx + 1; i < tokens.size() - 1; i++) { - tokens[i] = tokens[i + 1]; - } - tokens.pop_back(); // token length decreased - } - - // add optional EOS (=2) token, if desired - if (eos >= 0) { - while (eos--) { - tokens.push_back(eos_tok_); - } - } else { - ET_LOG(Error, "eos %d should be >= 0", eos); - return Error::InvalidArgument; - } - - delete[] str_buffer; - return Result(tokens); -} - -} // namespace llm -} // namespace extension -} // namespace executorch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h deleted file mode 100644 index b619905793..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include "tokenizer.h" -#include - -namespace executorch { -namespace extension { -namespace llm { - -struct TokenIndex { - const char *str; - int32_t id; -}; - -// A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code -// won't work with this class, it needs to go through tokenizer.py first. -class BPETokenizer : public Tokenizer { -public: - explicit BPETokenizer(); - ~BPETokenizer() override; - - ::executorch::runtime::Error load(const std::string &tokenizer_path) override; - - ::executorch::runtime::Result> - encode(const std::string &input, int8_t bos, int8_t eos) const override; - - ::executorch::runtime::Result - decode(uint64_t prev_token, uint64_t token) const override; - -private: - std::unique_ptr vocab_ = nullptr; - std::unique_ptr vocab_scores_ = nullptr; - std::unique_ptr sorted_vocab_ = nullptr; - unsigned int max_token_length_ = 0; - unsigned char byte_pieces_[512]; // stores all single-byte strings -}; - -} // namespace llm -} // namespace extension -} // namespace executorch - -namespace torch { -namespace executor { -// TODO(T197294990): Remove these deprecated aliases once all users have moved -// to the new `::executorch` namespaces. -using ::executorch::extension::llm::BPETokenizer; -using ::executorch::extension::llm::TokenIndex; -} // namespace executor -} // namespace torch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp deleted file mode 100644 index 8bc7ef4879..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "llama_tiktoken.h" - -namespace example { - -using ::executorch::extension::llm::Tiktoken; - -namespace { -static constexpr int32_t kSpecialTokensSize = 256; -static constexpr size_t kBOSTokenIndex = 0; -static constexpr size_t kEOSTokenIndex = 1; - -static inline std::unique_ptr> -_get_default_special_tokens() { - auto special_tokens = - std::make_unique>(std::vector{ - "<|begin_of_text|>", "<|end_of_text|>", - "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", - "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", - "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"}); - // pad the rest of the special tokens with reserved tokens - ssize_t reserved_special_token_num = 2; - while (special_tokens->size() < kSpecialTokensSize) { - special_tokens->emplace_back("<|reserved_special_token_" + - std::to_string(reserved_special_token_num++) + - "|>"); - } - return special_tokens; -} - -static inline std::unique_ptr> -_get_multimodal_special_tokens() { - auto special_tokens = - std::make_unique>(std::vector{ - "<|begin_of_text|>", "<|end_of_text|>", - "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", - "<|reserved_special_token_2|>", "<|reserved_special_token_3|>", - "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>", - "<|eot_id|>", "<|image|>"}); - - // pad the rest of the special tokens with reserved tokens except the last - // one - ssize_t reserved_special_token_num = 4; - while (special_tokens->size() < kSpecialTokensSize - 1) { - special_tokens->emplace_back("<|reserved_special_token_" + - std::to_string(reserved_special_token_num++) + - "|>"); - } - - special_tokens->emplace_back("<|python_tag|>"); - - return special_tokens; -} - -std::unique_ptr> _get_special_tokens(Version version) { - switch (version) { - case Version::Multimodal: - return _get_multimodal_special_tokens(); - default: - return _get_default_special_tokens(); - } -} - -} // namespace - -std::unique_ptr get_tiktoken_for_llama(Version version) { - return std::make_unique(_get_special_tokens(version), - kBOSTokenIndex, kEOSTokenIndex); -} - -} // namespace example diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h deleted file mode 100644 index 10e106f116..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include "tiktoken.h" - -namespace example { - -enum class Version { - Default, - Multimodal, -}; - -std::unique_ptr<::executorch::extension::llm::Tiktoken> -get_tiktoken_for_llama(Version version = Version::Default); - -} // namespace example diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp deleted file mode 100644 index aad4de6f75..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// Adopted from https://github.com/sewenew/tokenizer - -// @lint-ignore-every LICENSELINT -/************************************************************************** - Copyright (c) 2023 sewenew - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - *************************************************************************/ - -#include "tiktoken.h" -#include "base64.h" -#include -#include -#include - -using ::executorch::runtime::Error; -using ::executorch::runtime::Result; - -namespace executorch { -namespace extension { -namespace llm { - -// ------------------------------Util start------------------------------------ - -static uint64_t _max_size() { return std::numeric_limits::max(); } - -static Re2UPtr _create_regex(const std::string &pattern) { - assert(!pattern.empty()); - - return std::make_unique("(" + pattern + ")"); -} - -static Re2UPtr _build_special_token_regex(const Encoder &special_encoder) { - std::string special_pattern; - for (const auto &ele : special_encoder) { - if (!special_pattern.empty()) { - special_pattern += "|"; - } - special_pattern += re2::RE2::QuoteMeta(ele.first); - } - - if (special_pattern.empty()) { - return nullptr; - } - - return _create_regex(special_pattern); -} - -static Result> -_parse(const std::string &line) { - // Tiktoken format - // https://github.com/openai/tiktoken/blob/main/tiktoken/load.py#L140 - auto pos = line.find(" "); - ET_CHECK_OR_RETURN_ERROR(pos != std::string::npos, InvalidArgument, - "invalid tiktoken line: %s", line.c_str()); - - auto token = ET_UNWRAP(base64::decode({line.data(), pos})); - uint64_t rank = 0; - try { - rank = std::stoul(line.substr(pos + 1)); - } catch (const std::exception &) { - ET_CHECK_OR_RETURN_ERROR(false, InvalidArgument, "invalid encoder rank: %s", - line.c_str()); - } - - return std::pair{std::move(token), rank}; -} - -static Result _load_encoder(const std::string &path) { - std::ifstream file(path); - ET_CHECK_OR_RETURN_ERROR(file, InvalidArgument, - "failed to open encoder file: %s", path.c_str()); - - Encoder encoder; - std::string line; - while (std::getline(file, line)) { - auto [token, rank] = ET_UNWRAP(_parse(line)); - - ET_CHECK_OR_RETURN_ERROR(encoder.emplace(std::move(token), rank).second, - InvalidArgument, "duplicate item: %s", - line.c_str()); - } - - return encoder; -} - -static Result _build_decoder(const Encoder &encoder) { - Decoder decoder; - for (const auto &[k, v] : encoder) { - decoder.emplace(v, k); - } - - ET_CHECK_OR_RETURN_ERROR(encoder.size() == decoder.size(), InvalidArgument, - "duplicate items in encoder"); - - return decoder; -} - -static std::vector -_byte_pair_merge(const std::string &piece, - const std::unordered_map &ranks, - std::function func) { - // This is a vector of (start, rank). - // The rank is of the byte pair starting at position start. - // The rank of the last item in the vector is not a valid value. - std::vector> parts; - parts.reserve(piece.size() + 1); - for (auto idx = 0U; idx < piece.size() + 1; ++idx) { - parts.emplace_back(idx, _max_size()); - } - - auto get_rank = - [&piece, &ranks](const std::vector> &parts, - uint64_t start_idx, - uint64_t skip) -> std::optional { - if (start_idx + skip + 2 < parts.size()) { - auto s = parts[start_idx].first; - auto e = parts[start_idx + skip + 2].first; - auto key = piece.substr(s, e - s); - auto iter = ranks.find(key); - if (iter != ranks.end()) { - return iter->second; - } - } - return std::nullopt; - }; - - // We look up the ranks once in the beginning and iteratively update - // them during each merge, which reduces the number of rank lookups. - for (auto i = 0U; i < parts.size() - 2; ++i) { - auto rank = get_rank(parts, i, 0); - if (rank) { - // usize::MAX is a sentinel value and cannot be a valid rank - ET_CHECK_MSG(*rank != _max_size(), "rank is too large"); - parts[i].second = *rank; - } - } - - // If you have n parts and m merges, this does O(mn) work. - // We could do something with a heap and do O(m log n) work. - // It is important to consider that n is often small (<100), and as such - // the cache-locality benefits outweigh the algorithmic complexity downsides - // of the `parts` vector data structure above. - - // Note that we hash bytes, not token pairs. As long as we train BPE the way - // we currently do, this is equivalent. An easy way to break this would be - // to decouple merge priority from token index or to prevent specific token - // merges. - while (true) { - if (parts.size() == 1) { - break; - } - - // usize::MAX is a sentinel rank value allowing us to - // take the min more quickly - auto min_rank = std::make_pair(_max_size(), 0); - for (auto i = 0U; i < parts.size() - 1; ++i) { - auto rank = parts[i].second; - if (rank < min_rank.first) { - min_rank.first = rank; - min_rank.second = i; - } - } - - if (min_rank.first != _max_size()) { - auto i = min_rank.second; - - // NOTE: We are about to remove parts[i + 1]. We do not do it - // yet because there are cache-locality benefits to updating - // parts[i] and parts[i-1] before removing, which could thrash - // the cache. Thus, we update the rank calculation by skipping over - // parts[i + 1], by invoking `get_rank!` with `skip = 1`. - auto rank = get_rank(parts, i, 1); - if (rank) { - parts[i].second = *rank; - } else { - parts[i].second = _max_size(); - } - if (i > 0) { - rank = get_rank(parts, i - 1, 1); - if (rank) { - parts[i - 1].second = *rank; - } else { - parts[i - 1].second = _max_size(); - } - } - - parts.erase(parts.begin() + (i + 1)); - } else { - break; - } - } - std::vector out; - out.reserve(parts.size() - 1); - for (auto i = 0U; i < parts.size() - 1; ++i) { - auto s = parts[i].first; - auto e = parts[i + 1].first; - out.push_back(func(s, e)); - } - return out; -} - -static std::vector _byte_pair_encode(const std::string &piece, - const Encoder &encoder) { - if (piece.size() == 1) { - auto iter = encoder.find(piece); - if (iter != encoder.end()) { - return std::vector({iter->second}); - } else { - // TODO: is it possible? - return {}; - } - } - - return _byte_pair_merge(piece, encoder, - [&piece, &encoder](uint64_t start, uint64_t stop) { - std::string key = piece.substr(start, stop - start); - auto iter = encoder.find(key); - if (iter != encoder.end()) { - return iter->second; - } else { - // TODO: what if key does not exist? Should we - // return `unknown`? assert(false); // ?? - return uint64_t(0); - } - }); -} -// ------------------------------Util end------------------------------------ -// -------------------------private method start------------------------------- - -template -std::pair, re2::StringPiece> -Tiktoken::_split_with_allowed_special_token(re2::StringPiece &input, - const T &allowed_special) const { - if (!_special_token_regex) { - return std::make_pair(std::nullopt, input); - } - -#if __cplusplus >= 202002L - auto start = input.begin(); -#else - const char *start = input.data(); -#endif - std::string special; - while (true) { - if (!re2::RE2::FindAndConsume(&input, *_special_token_regex, &special)) { - // No special token. - break; - } - - if (allowed_special.count(special) == 1) { - // Found an allowed special token, split the text with it. -#if __cplusplus >= 202002L - return std::make_pair( - special, - re2::StringPiece(start, input.begin() - start - special.size())); -#else - return std::make_pair( - special, - re2::StringPiece(start, (input.data() - start) - special.size())); -#endif - } // else try to find the next special token - } - - return std::make_pair(std::nullopt, input); -} - -void Tiktoken::_encode(re2::StringPiece &input, std::vector &ret, - uint64_t &last_piece_token_len) const { - std::string piece; - assert(_regex); - while (re2::RE2::FindAndConsume(&input, *_regex, &piece)) { - auto iter = _encoder.find(piece); - if (iter != _encoder.end()) { - last_piece_token_len = 1; - ret.push_back(iter->second); - continue; - } - auto tokens = _byte_pair_encode(piece, _encoder); - last_piece_token_len = tokens.size(); - ret.insert(ret.end(), tokens.begin(), tokens.end()); - } -} - -template -std::pair, uint64_t> -Tiktoken::_encode_with_special_token(const std::string &text, - const T &allowed_special) const { - std::vector tokens; - uint64_t last_piece_token_len = 0; - re2::StringPiece input(text); - while (true) { - auto [special, sub_input] = - _split_with_allowed_special_token(input, allowed_special); - - _encode(sub_input, tokens, last_piece_token_len); - - if (special) { - uint64_t token = 0; - try { - token = _special_token_encoder.at(*special); - } catch (const std::out_of_range &) { - // Should never go here, since special pattern includes all special - // chars. - ET_CHECK_MSG(false, "unknown special token: %s", special->c_str()); - } - - tokens.push_back(token); - last_piece_token_len = 0; - } else { - break; - } - } - - // last_piece_token_len is how many tokens came from the last regex split. - // This is used for determining unstable tokens, since you can't merge - // across (stable) regex splits - return std::make_pair(tokens, last_piece_token_len); -} - -Encoder Tiktoken::_build_special_token_encoder(ssize_t num_base_tokens) const { - Encoder special_token_encoder; - for (ssize_t i = 0; i < _special_tokens->size(); ++i) { - special_token_encoder.emplace(_special_tokens->at(i), num_base_tokens + i); - } - return special_token_encoder; -} - -// -------------------------private method end------------------------------- -// -------------------------public method start------------------------------- - -Tiktoken::Tiktoken(std::unique_ptr> special_tokens, - size_t bos_token_index, size_t eos_token_index) - : Tokenizer(), _special_tokens(std::move(special_tokens)), - _bos_token_index(bos_token_index), _eos_token_index(eos_token_index) { - ET_CHECK_MSG(_bos_token_index < _special_tokens->size(), - "invalid bos_token_index %zu", _bos_token_index); - ET_CHECK_MSG(_eos_token_index < _special_tokens->size(), - "invalid eos_token_index %zu", _eos_token_index); -} - -Error Tiktoken::load(const std::string &path) { - _encoder = ET_UNWRAP(_load_encoder(path)); - _special_token_encoder = _build_special_token_encoder(_encoder.size()); - - _decoder = ET_UNWRAP(_build_decoder(_encoder)); - _special_token_decoder = ET_UNWRAP(_build_decoder(_special_token_encoder)); - - _regex = _create_regex(_pattern); - // Warmup re2 as it is slow on the first run, void the return value as it's - // not needed Refer to - // https://github.com/google/re2/blob/6dcd83d60f7944926bfd308cc13979fc53dd69ca/re2/fuzzing/re2_fuzzer.cc#L136-L141 - (void)_regex->ReverseProgramSize(); - - _special_token_regex = _build_special_token_regex(_special_token_encoder); - // Same as above, warm up re2 - (void)_special_token_regex->ReverseProgramSize(); - - // initialize vocab_size, bos_tok, eos_tok - vocab_size_ = _encoder.size() + _special_token_encoder.size(); - bos_tok_ = _special_token_encoder.at(_special_tokens->at(_bos_token_index)); - eos_tok_ = _special_token_encoder.at(_special_tokens->at(_eos_token_index)); - - initialized_ = true; - return Error::Ok; -} - -Result> Tiktoken::encode(const std::string &text, - int8_t bos, int8_t eos) const { - if (!initialized_) { - return Error::NotSupported; - } - auto res = _encode_with_special_token(text, _special_token_encoder).first; - for (auto i = 0; i < bos; ++i) { - res.insert(res.begin(), bos_tok_); - } - for (auto i = 0; i < eos; ++i) { - res.push_back(eos_tok_); - } - return Result>(std::move(res)); -} - -Result Tiktoken::decode(uint64_t prev, uint64_t cur) const { - (void)prev; - ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(cur)); - std::string ret; - - std::string token_bytes; - auto iter = _decoder.find(cur); - if (iter != _decoder.end()) { - token_bytes = iter->second; - } else { - iter = _special_token_decoder.find(cur); - if (iter != _special_token_decoder.end()) { - token_bytes = iter->second; - } else { - ET_CHECK_MSG(false, "unknown token: %" PRIu64, cur); - } - } - ret += token_bytes; - - return ret; -} -// -------------------------public method end------------------------------- - -} // namespace llm -} // namespace extension -} // namespace executorch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h deleted file mode 100644 index 5eed7e94c8..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include "tokenizer.h" -#include -#include -#include -#include - -namespace executorch { -namespace extension { -namespace llm { - -using Encoder = std::unordered_map; -using Decoder = std::unordered_map; -using Re2UPtr = std::unique_ptr; - -class Tiktoken : public Tokenizer { -public: - /** - * @param[in] special_tokens List of special tokens including bos, eos; - * @param[in] bos_token_index Index of the bos token in special_tokens; - * @param[in] eos_token_index Index of the eos token in special_tokens. - */ - explicit Tiktoken(std::unique_ptr> special_tokens, - size_t bos_token_index, size_t eos_token_index); - - ::executorch::runtime::Error load(const std::string &tokenizer_path) override; - - ::executorch::runtime::Result> - encode(const std::string &input, int8_t bos, int8_t eos) const override; - - ::executorch::runtime::Result - decode(uint64_t prev_token, uint64_t token) const override; - -private: - template - std::pair, re2::StringPiece> - _split_with_allowed_special_token(re2::StringPiece &input, - const T &allowed_special) const; - - void _encode(re2::StringPiece &input, std::vector &ret, - uint64_t &last_piece_token_len) const; - - template - std::pair, uint64_t> - _encode_with_special_token(const std::string &text, - const T &allowed_special) const; - - Encoder _build_special_token_encoder(ssize_t num_base_tokens) const; - - std::unique_ptr> _special_tokens; - size_t _bos_token_index; - size_t _eos_token_index; - // Removed negative lookahead \s+(?!\S) since it's not supported by RE2. - const std::string _pattern = - R"((?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+)"; - Encoder _encoder; - Encoder _special_token_encoder; - Decoder _decoder; - Decoder _special_token_decoder; - - Re2UPtr _regex; - Re2UPtr _special_token_regex; -}; - -} // namespace llm -} // namespace extension -} // namespace executorch - -namespace torch { -namespace executor { -// TODO(T197294990): Remove these deprecated aliases once all users have moved -// to the new `::executorch` namespaces. -using ::executorch::extension::llm::Decoder; -using ::executorch::extension::llm::Encoder; -using ::executorch::extension::llm::Re2UPtr; -using ::executorch::extension::llm::Tiktoken; -} // namespace executor -} // namespace torch diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h deleted file mode 100644 index 948cccc0d7..0000000000 --- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -// patternlint-disable-next-line executorch-cpp-nostdinc -#include -// patternlint-disable-next-line executorch-cpp-nostdinc -#include - -#include -#include - -namespace executorch { -namespace extension { -namespace llm { - -// A tokenizer interface. -class Tokenizer { -public: - explicit Tokenizer() {} - virtual ~Tokenizer() {} - - virtual ::executorch::runtime::Error - load(const std::string &tokenizer_path) = 0; - - virtual ::executorch::runtime::Result> - encode(const std::string &input, int8_t bos, int8_t eos) const = 0; - - ::executorch::runtime::Error decode_verify(uint64_t token) const { - if (!initialized_) { - ET_LOG(Error, "Tokenizer not initialized"); - return ::executorch::runtime::Error::NotSupported; - } - if (token >= vocab_size_) { - ET_LOG(Error, "token %" PRIu64 " is out side of vacab range %d", token, - vocab_size_); - return ::executorch::runtime::Error::NotSupported; - } - return ::executorch::runtime::Error::Ok; - } - - virtual ::executorch::runtime::Result - decode(uint64_t prev_token, uint64_t token) const = 0; - - // getters - int32_t vocab_size() const { return vocab_size_; } - - uint64_t bos_tok() const { return bos_tok_; } - - uint64_t eos_tok() const { return eos_tok_; } - -protected: - bool initialized_ = false; - int32_t vocab_size_ = 0; - uint64_t bos_tok_ = 0; - uint64_t eos_tok_ = 0; -}; - -} // namespace llm -} // namespace extension -} // namespace executorch - -namespace torch { -namespace executor { -// TODO(T197294990): Remove these deprecated aliases once all users have moved -// to the new `::executorch` namespaces. -using ::executorch::extension::llm::Tokenizer; -} // namespace executor -} // namespace torch