Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3ced9e2
feat: add static libs & include for tokenizers-cpp
chmjkb Jun 13, 2025
78a1e98
fix: add object detection to installer check
chmjkb Jun 13, 2025
7844378
chore: add TokenizerModule to RnExecutorchInstaller
chmjkb Jun 13, 2025
f522ba2
wip
chmjkb Jun 13, 2025
b676862
wip
chmjkb Jun 13, 2025
c5acf97
fix: register TokenizerModule constructor & fix concept usages
chmjkb Jun 13, 2025
93ba4a6
fix: implement getMemoryLowerBound
chmjkb Jun 16, 2025
6db7d24
fix: remove redundant shared pointer usage within et bindings
chmjkb Jun 16, 2025
0ed7ea5
feat: Add remaining Tokenizers method implementations to c++
chmjkb Jun 16, 2025
dc57ff0
feat: implement remaining tokenizer methods in TS api
chmjkb Jun 16, 2025
b6d51d7
chore: remove consts
chmjkb Jun 16, 2025
73e6058
chore: remove reduntant includes in StyleTransfer.cpp
chmjkb Jun 16, 2025
f85f5cd
chore: remove redundant loge, call std::move() when forwarding args t…
chmjkb Jun 16, 2025
3132dbf
wip: tokenizer hook refactor
chmjkb Jun 16, 2025
f9baa74
fix: replace old tokenizermodule with the new one, make changes in st…
chmjkb Jun 17, 2025
89d1ee1
chore: make tokenizer private
chmjkb Jun 17, 2025
9ebcfed
chore: fix return types in native code, add const / const noexcept
chmjkb Jun 17, 2025
f56b454
refactor: move repeated code to a function
chmjkb Jun 17, 2025
e46e795
refactor: make it possible to pass methodname to ensureTokenizerLoaded
chmjkb Jun 17, 2025
b55947f
fix: export all the TokenizerModule methods to JS & add a specific co…
chmjkb Jun 17, 2025
75f5ede
chore: use list initialization in TokenizerModule constructor
chmjkb Jun 17, 2025
4e5ba99
refactor: 🧹
chmjkb Jun 17, 2025
f5c9001
chore: make TokenizerModule constructor explicit
chmjkb Jun 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "RnExecutorchInstaller.h"

#include <rnexecutorch/TokenizerModule.h>
#include <rnexecutorch/host_objects/JsiConversions.h>
#include <rnexecutorch/models/classification/Classification.h>
#include <rnexecutorch/models/image_segmentation/ImageSegmentation.h>
Expand Down Expand Up @@ -42,6 +43,11 @@ void RnExecutorchInstaller::injectJSIBindings(
*jsiRuntime, "loadExecutorchModule",
RnExecutorchInstaller::loadModel<BaseModel>(jsiRuntime, jsCallInvoker,
"loadExecutorchModule"));

jsiRuntime->global().setProperty(
*jsiRuntime, "loadTokenizerModule",
RnExecutorchInstaller::loadModel<TokenizerModule>(
jsiRuntime, jsCallInvoker, "loadTokenizerModule"));
}

} // namespace rnexecutorch
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ REGISTER_CONSTRUCTOR(ObjectDetection, std::string,
std::shared_ptr<react::CallInvoker>);
REGISTER_CONSTRUCTOR(BaseModel, std::string,
std::shared_ptr<react::CallInvoker>);
REGISTER_CONSTRUCTOR(TokenizerModule, std::string,
std::shared_ptr<react::CallInvoker>);

using namespace facebook;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include "TokenizerModule.h"
#include <executorch/extension/module/module.h>
#include <filesystem>
#include <rnexecutorch/Log.h>
#include <rnexecutorch/data_processing/FileUtils.h>

namespace rnexecutorch {
using namespace facebook;

TokenizerModule::TokenizerModule(
std::string source, std::shared_ptr<react::CallInvoker> callInvoker)
: memorySizeLowerBound(std::filesystem::file_size(source)),
tokenizer(tokenizers::Tokenizer::FromBlobJSON(
fileutils::loadBytesFromFile(source))) {}

void TokenizerModule::ensureTokenizerLoaded(
const std::string &methodName) const {
if (!tokenizer) {
throw std::runtime_error(
methodName + " function was called on an uninitialized tokenizer!");
}
}

std::vector<int32_t> TokenizerModule::encode(std::string s) const {
ensureTokenizerLoaded("encode");
return tokenizer->Encode(s);
}

std::string TokenizerModule::decode(std::vector<int32_t> vec,
bool skipSpecialTokens) const {
ensureTokenizerLoaded("decode");
return tokenizer->Decode(vec, skipSpecialTokens);
}

size_t TokenizerModule::getVocabSize() const {
ensureTokenizerLoaded("getVocabSize");
return tokenizer->GetVocabSize();
}

std::string TokenizerModule::idToToken(int32_t tokenId) const {
ensureTokenizerLoaded("idToToken");
return tokenizer->IdToToken(tokenId);
}

int32_t TokenizerModule::tokenToId(std::string token) const {
ensureTokenizerLoaded("tokenToId");
return tokenizer->TokenToId(token);
}
std::size_t TokenizerModule::getMemoryLowerBound() const noexcept {
return memorySizeLowerBound;
}
} // namespace rnexecutorch
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#pragma once

#include <ReactCommon/CallInvoker.h>
#include <string>
#include <tokenizers-cpp/tokenizers_cpp.h>

namespace rnexecutorch {
using namespace facebook;

class TokenizerModule {
public:
explicit TokenizerModule(std::string source,
std::shared_ptr<react::CallInvoker> callInvoker);
std::vector<int32_t> encode(std::string s) const;
std::string decode(std::vector<int32_t> vec, bool skipSpecialTokens) const;
std::string idToToken(int32_t tokenId) const;
int32_t tokenToId(std::string token) const;
std::size_t getVocabSize() const;
std::size_t getMemoryLowerBound() const noexcept;

private:
void ensureTokenizerLoaded(const std::string &methodName) const;
std::unique_ptr<tokenizers::Tokenizer> tokenizer;
const std::size_t memorySizeLowerBound{0};
};
} // namespace rnexecutorch
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include <chrono>
#include <filesystem>
#include <fstream>
#include <string>

namespace rnexecutorch::fileutils {
Expand All @@ -11,4 +13,18 @@ inline std::string getTimeID() {
.count());
}

inline std::string loadBytesFromFile(const std::string &path) {
std::ifstream fs(path, std::ios::in | std::ios::binary);
if (fs.fail()) {
throw std::runtime_error("Failed to open tokenizer file");
}
std::string data;
fs.seekg(0, std::ios::end);
size_t size = static_cast<size_t>(fs.tellg());
fs.seekg(0, std::ios::beg);
data.resize(size);
fs.read(data.data(), size);
return data;
};

} // namespace rnexecutorch::fileutils
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ inline std::string getValue<std::string>(const jsi::Value &val,
return val.getString(runtime).utf8(runtime);
}

template <>
inline std::vector<int32_t>
getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
jsi::Array array = val.asObject(runtime).asArray(runtime);
size_t length = array.size(runtime);
std::vector<int32_t> result;
result.reserve(length);

for (size_t i = 0; i < length; ++i) {
jsi::Value element = array.getValueAtIndex(runtime, i);
result.push_back(getValue<int32_t>(element, runtime));
}
return result;
}

template <>
inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
jsi::Runtime &runtime) {
Expand Down Expand Up @@ -182,20 +197,18 @@ getJsiValue(const std::vector<std::shared_ptr<OwningArrayBuffer>> &vec,
return jsi::Value(runtime, array);
}

inline jsi::Value
getJsiValue(const std::vector<std::shared_ptr<JSTensorViewOut>> &vec,
jsi::Runtime &runtime) {
inline jsi::Value getJsiValue(const std::vector<JSTensorViewOut> &vec,
jsi::Runtime &runtime) {
jsi::Array array(runtime, vec.size());
for (size_t i = 0; i < vec.size(); i++) {
jsi::Object tensorObj(runtime);

tensorObj.setProperty(runtime, "sizes",
getJsiValue(vec[i]->sizes, runtime));
tensorObj.setProperty(runtime, "sizes", getJsiValue(vec[i].sizes, runtime));

tensorObj.setProperty(runtime, "scalarType",
jsi::Value(static_cast<int>(vec[i]->scalarType)));
jsi::Value(static_cast<int>(vec[i].scalarType)));

jsi::ArrayBuffer arrayBuffer(runtime, vec[i]->dataPtr);
jsi::ArrayBuffer arrayBuffer(runtime, vec[i].dataPtr);
tensorObj.setProperty(runtime, "dataPtr", arrayBuffer);

array.setValueAtIndex(runtime, i, tensorObj);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include <ReactCommon/CallInvoker.h>

#include <rnexecutorch/Log.h>
#include <rnexecutorch/TokenizerModule.h>
#include <rnexecutorch/host_objects/JSTensorViewOut.h>
#include <rnexecutorch/host_objects/JsiConversions.h>
#include <rnexecutorch/jsi/JsiHostObject.h>
Expand Down Expand Up @@ -45,6 +45,31 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
promiseHostFunction<&Model::generate>,
"generate"));
}

if constexpr (meta::HasEncode<Model>) {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::encode>,
"encode"));
}

if constexpr (meta::SameAs<Model, TokenizerModule>) {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::encode>,
"encode"));

addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::decode>,
"decode"));
addFunctions(JSI_EXPORT_FUNCTION(
ModelHostObject<Model>, promiseHostFunction<&Model::getVocabSize>,
"getVocabSize"));
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::idToToken>,
"idToToken"));
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::tokenToId>,
"tokenToId"));
}
}

// A generic host function that resolves a promise with a result of a
Expand Down Expand Up @@ -76,8 +101,8 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
std::thread([this, promise,
argsConverted = std::move(argsConverted)]() {
try {
auto result =
std::apply(std::bind_front(FnPtr, model), argsConverted);
auto result = std::apply(std::bind_front(FnPtr, model),
std::move(argsConverted));
// The result is copied. It should either be quickly copiable,
// or passed with a shared_ptr.
callInvoker->invokeAsync([promise,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) {
return sizeof...(Types);
}

template <typename Model, typename R, typename... Types>
constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) {
return sizeof...(Types);
}

template <typename... Types, std::size_t... I>
std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
const jsi::Value *args,
Expand All @@ -34,4 +39,12 @@ std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...),
return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
runtime);
}

template <typename Model, typename R, typename... Types>
std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...) const,
const jsi::Value *args,
jsi::Runtime &runtime) {
return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
runtime);
}
} // namespace rnexecutorch::meta
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,24 @@ namespace rnexecutorch::meta {
template <typename T, typename Base>
concept DerivedFromOrSameAs = std::is_base_of_v<Base, T>;

template <typename T, typename Base>
concept SameAs = std::is_same_v<Base, T>;

template <typename T>
concept HasGenerate = requires(T t) {
{ &T::generate };
};

template <typename T>
concept HasEncode = requires(T t) {
{ &T::encode };
};

template <typename T>
concept HasDecode = requires(T t) {
{ &T::decode };
};

template <typename T>
concept IsNumeric = std::is_arithmetic_v<T>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ BaseModel::getAllInputShapes(std::string methodName) {
return output;
}

std::vector<std::shared_ptr<JSTensorViewOut>>
std::vector<JSTensorViewOut>
BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
if (!module) {
throw std::runtime_error("Model not loaded: Cannot perform forward pass");
Expand Down Expand Up @@ -114,7 +114,7 @@ BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
}

auto &outputs = result.get();
std::vector<std::shared_ptr<JSTensorViewOut>> output;
std::vector<JSTensorViewOut> output;
output.reserve(outputs.size());

// Convert ET outputs to a vector of JSTensorViewOut which are later
Expand All @@ -125,8 +125,7 @@ BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
size_t bufferSize = outputTensor.numel() * outputTensor.element_size();
auto buffer = std::make_shared<OwningArrayBuffer>(bufferSize);
std::memcpy(buffer->data(), outputTensor.const_data_ptr(), bufferSize);
auto jsTensor = std::make_shared<JSTensorViewOut>(
sizes, outputTensor.scalar_type(), buffer);
auto jsTensor = JSTensorViewOut(sizes, outputTensor.scalar_type(), buffer);
output.emplace_back(jsTensor);
}
return output;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class BaseModel {
std::vector<int32_t> getInputShape(std::string method_name, int index);
std::vector<std::vector<int32_t>>
getAllInputShapes(std::string methodName = "forward");
std::vector<std::shared_ptr<JSTensorViewOut>>
std::vector<JSTensorViewOut>
forwardJS(std::vector<JSTensorViewIn> tensorViewVec);

protected:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
#include "StyleTransfer.h"

#include <rnexecutorch/Log.h>
#include <rnexecutorch/data_processing/ImageProcessing.h>

#include <span>

#include <executorch/extension/tensor/tensor.h>
#include <opencv2/opencv.hpp>

Expand Down Expand Up @@ -42,7 +39,7 @@ std::string StyleTransfer::postprocess(const Tensor &tensor,
}

std::string StyleTransfer::generate(std::string imageSource) {
auto [inputTensor, originalSize] =
auto [inputTensor, originalSize] =
imageprocessing::readImageToTensor(imageSource, getAllInputShapes()[0]);

auto forwardResult = BaseModel::forward(inputTensor);
Expand Down
Loading