|
17 | 17 | #include <executorch/extension/llm/runner/text_llm_runner.h> |
18 | 18 | #include <executorch/extension/llm/runner/text_prefiller.h> |
19 | 19 | #include <executorch/extension/llm/runner/text_token_generator.h> |
| 20 | +#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h> |
20 | 21 | #include <executorch/runtime/core/result.h> |
21 | 22 | #include <executorch/runtime/platform/runtime.h> |
22 | 23 | #include <pytorch/tokenizers/hf_tokenizer.h> |
@@ -226,12 +227,28 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner( |
226 | 227 |
|
227 | 228 | // Create the Module |
228 | 229 | std::unique_ptr<Module> module; |
| 230 | + uint32_t max_cached_memory_size_bytes_ = 1024 * 1024 * 10; // 10MB |
229 | 231 | if (data_files.size() > 0) { |
230 | 232 | module = std::make_unique<Module>( |
231 | | - model_path, data_files, load_mode, std::move(event_tracer)); |
| 233 | + model_path, |
| 234 | + data_files, |
| 235 | + load_mode, |
| 236 | + std::move(event_tracer), |
| 237 | + nullptr, // memory allocator |
| 238 | + std::make_unique< |
| 239 | + executorch::extension::CPUCachingAllocator>( // temp memory |
| 240 | + // allocator |
| 241 | + max_cached_memory_size_bytes_)); |
232 | 242 | } else { |
233 | 243 | module = std::make_unique<Module>( |
234 | | - model_path, load_mode, std::move(event_tracer)); |
| 244 | + model_path, |
| 245 | + load_mode, |
| 246 | + std::move(event_tracer), // event tracer |
| 247 | + nullptr, // memory allocator |
| 248 | + std::make_unique< |
| 249 | + executorch::extension::CPUCachingAllocator>( // temp memory |
| 250 | + // allocator |
| 251 | + max_cached_memory_size_bytes_)); |
235 | 252 | } |
236 | 253 |
|
237 | 254 | // Get metadata from Module |
|
0 commit comments