44#include < filesystem>
55#include < map>
66#include < rnexecutorch/Error.h>
7- #include < rnexecutorch/Log.h>
87#include < rnexecutorch/threads/GlobalThreadPool.h>
98#include < runner/encoders/audio_encoder.h>
109#include < runner/encoders/vision_encoder.h>
@@ -22,7 +21,6 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
2221 std::vector<std::string> capabilities,
2322 std::shared_ptr<react::CallInvoker> callInvoker)
2423 : BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) {
25-
2624 if (capabilities.empty ()) {
2725 runner_ =
2826 std::make_unique<llm::TextRunner>(std::move (module_), tokenizerSource);
@@ -72,96 +70,19 @@ std::string LLM::generate(std::string input,
7270
7371 auto config = llm::GenerationConfig{.echo = false , .warming = false };
7472 auto error = runner_->generate (input, config, nativeCallback, {});
73+ // No-op unless built with ET_EVENT_TRACER_ENABLED. Writes etdump.bin
74+ // alongside the model after the generation finishes.
75+ dumpEventTracer ();
7576 if (error != Error::Ok) {
7677 throw RnExecutorchError (error, " Failed to generate text" );
7778 }
7879 return output;
7980}
8081
81- std::string LLM::generateMultimodal (std::string prompt,
82- std::vector<std::string> imagePaths,
83- std::string imageToken,
84- std::shared_ptr<jsi::Function> callback) {
85- if (!runner_ || !runner_->is_loaded ()) {
86- throw RnExecutorchError (RnExecutorchErrorCode::ModuleNotLoaded,
87- " Runner is not loaded" );
88- }
89- if (!runner_->is_multimodal ()) {
90- throw RnExecutorchError (
91- RnExecutorchErrorCode::InvalidUserInput,
92- " This model does not support multimodal input. Use generate(prompt, "
93- " callback) for text-only generation." );
94- }
95- if (imageToken.empty ()) {
96- throw RnExecutorchError (
97- RnExecutorchErrorCode::InvalidUserInput,
98- " imageToken must not be empty. Pass the model's image token (e.g. "
99- " from tokenizer_config.json)." );
100- }
101-
102- const size_t kImageTokenLen = imageToken.size ();
103-
104- std::vector<llm::MultimodalInput> inputs;
105- size_t imageIdx = 0 ;
106- size_t searchPos = 0 ;
107-
108- while (true ) {
109- size_t found = prompt.find (imageToken, searchPos);
110- if (found == std::string::npos) {
111- if (searchPos < prompt.size ()) {
112- inputs.push_back (llm::make_text_input (prompt.substr (searchPos)));
113- }
114- break ;
115- }
116- // Text segment before this placeholder
117- if (found > searchPos) {
118- inputs.push_back (
119- llm::make_text_input (prompt.substr (searchPos, found - searchPos)));
120- }
121- // Image at this position
122- if (imageIdx >= imagePaths.size ()) {
123- throw RnExecutorchError (
124- RnExecutorchErrorCode::InvalidUserInput,
125- " More '" + imageToken +
126- " ' placeholders in prompt than image paths provided" );
127- }
128- inputs.push_back (llm::make_image_input (imagePaths[imageIdx++]));
129- searchPos = found + kImageTokenLen ;
130- }
131-
132- if (imageIdx < imagePaths.size ()) {
133- throw RnExecutorchError (RnExecutorchErrorCode::InvalidUserInput,
134- " More image paths provided than '" + imageToken +
135- " ' placeholders in prompt" );
136- }
137-
138- if (inputs.empty ()) {
139- throw RnExecutorchError (RnExecutorchErrorCode::InvalidUserInput,
140- " No inputs to generate from" );
141- }
142-
143- std::string output;
144- auto nativeCallback = [this , callback, &output](const std::string &token) {
145- output += token;
146- if (callback && callInvoker) {
147- callInvoker->invokeAsync ([callback, token](jsi::Runtime &runtime) {
148- callback->call (runtime, jsi::String::createFromUtf8 (runtime, token));
149- });
150- }
151- };
152-
153- auto error = runner_->generate (inputs, nativeCallback);
154- if (error != Error::Ok) {
155- throw RnExecutorchError (error, " Failed to generate multimodal response" );
156- }
157-
158- return output;
159- }
160-
161- std::string LLM::generateMultimodalWithAudio (
162- std::string prompt, std::vector<std::string> imagePaths,
163- std::string imageToken, std::vector<std::vector<float >> audioWaveforms,
164- std::string audioToken, std::shared_ptr<jsi::Function> callback) {
82+ std::string LLM::generateMultimodal (
83+ std::string prompt, std::shared_ptr<jsi::Function> callback,
84+ std::vector<std::string> imagePaths, std::string imageToken,
85+ std::vector<std::vector<float >> audioWaveforms, std::string audioToken) {
16586 if (!runner_ || !runner_->is_loaded ()) {
16687 throw RnExecutorchError (RnExecutorchErrorCode::ModuleNotLoaded,
16788 " Runner is not loaded" );
@@ -234,6 +155,7 @@ std::string LLM::generateMultimodalWithAudio(
234155 });
235156 }
236157 };
158+
237159 auto error = runner_->generate (inputs, nativeCallback);
238160 if (error != Error::Ok) {
239161 throw RnExecutorchError (error, " Failed to generate multimodal response" );
0 commit comments