Skip to content

Commit 1aac7aa

Browse files
committed
fix: update memoryLowerBound for llm
1 parent 4de1a86 commit 1aac7aa

1 file changed

Lines changed: 10 additions & 3 deletions

File tree

  • packages/react-native-executorch/common/rnexecutorch/models/llm

packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "LLM.h"
22

3+
#include <cmath>
34
#include <executorch/extension/tensor/tensor.h>
45
#include <filesystem>
56
#include <map>
@@ -42,8 +43,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
4243
throw RnExecutorchError(loadResult, "Failed to load LLM runner");
4344
}
4445

45-
memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
46-
fs::file_size(fs::path(tokenizerSource));
46+
// I am purposefully not adding file size of the model here. The reason is
47+
// that Hermes would crash the app if we try to alloc too much memory here.
48+
// Also, given we're using mmap, the true memory consumption of a model is not
49+
// really equal to the size of the model. The size of the tokenizer file is a
50+
// hint to the GC that this object might be worth getting rid of.
51+
memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource));
4752
}
4853

4954
std::string LLM::generate(std::string input,
@@ -194,7 +199,9 @@ int32_t LLM::countTextTokens(std::string text) const {
194199
return runner_->count_text_tokens(text);
195200
}
196201

197-
size_t LLM::getMemoryLowerBound() const noexcept { return 0; }
202+
size_t LLM::getMemoryLowerBound() const noexcept {
203+
return memorySizeLowerBound;
204+
};
198205

199206
void LLM::setCountInterval(size_t countInterval) {
200207
if (!runner_ || !runner_->is_loaded()) {

0 commit comments

Comments
 (0)