fix: fix unload in LLM and ModelHostObject to properly free LLM memory (#954)

barhanc · msluszniak · web-flow · commit a95a7c1f5eb2 · 2026-03-12T12:42:34.000+01:00
## Description Fixes two related memory management bugs in the LLM delete flow: - **LLM.cpp**. `LLM::unload()` was destroying the runner object but never calling `BaseModel::unload()` and thus not releasing the module memory. `LLM::unload()` now calls `BaseModel::unload()` after resetting the runner. - **LLMController.ts**. When `load()` was called on an already-loaded `LLMController`, the previous native module instance was simply overwritten without calling `unload()` first. This caused a memory leak. It now unloads the existing native module before loading a new one. - **ModelHostObject.h**. Reset JSI external memory pressure to `0` after `model->unload()` so the JS GC is correctly informed that native memory has been freed. ### Introduces a breaking change? - [ ] Yes - [x] No ### Type of change - [x] Bug fix (change which fixes an issue) - [ ] New feature (change which adds functionality) - [ ] Documentation update (improves or adds clarity to existing documentation) - [ ] Other (chores, tests, code style improvements etc.) ### Tested on - [x] iOS - [x] Android ### Testing instructions Use the provided screen (you can simply replace the current LLM app screen) to reproduce the bug and verify the fix. It lets you load and unload an LLM and observe memory behavior with `vmmap` / `adb`. - [x] Run the LLM example app - [x] Prepare memory monitors - **iOS**: `xcrun simctl spawn booted launchctl list | grep llm` and `watch -n 0.1 "vmmap <pid> | tail -12"` - **Android**: `watch -n 0.1 "adb shell dumpsys meminfo com.anonymous.llm"` - [x] Press Load - wait for ready - [x] Note baseline native memory in profiler - [x] Press Unload - verify native memory returns to baseline - [x] Press Load again without restarting - verify no accumulation - [x] Repeat load/unload 5+ times - confirm no upward drift in native memory and no crash - [x] Press Load a few times without pressing Unload -- verify no memory accumulation ```typescript import { useEffect, useRef, useState } from 'react'; import { StyleSheet, Text, TouchableOpacity, View } from 'react-native'; import { LLMModule, HAMMER2_1_1_5B_QUANTIZED } from 'react-native-executorch'; export default function LLMScreen() { const [status, setStatus] = useState('idle'); const llmRef = useRef<LLMModule | null>(null); useEffect(() => { llmRef.current = new LLMModule(); return () => { try { llmRef.current?.interrupt(); llmRef.current?.delete(); } catch {} }; }, []); const handleLoad = async () => { setStatus('loading...'); try { await llmRef.current!.load(HAMMER2_1_1_5B_QUANTIZED, (p) => setStatus(`loading ${(p * 100).toFixed(0)}%`) ); setStatus('ready'); } catch (e: any) { setStatus(`load error: ${e?.message}`); } }; const handleUnload = () => { try { llmRef.current?.interrupt(); llmRef.current?.delete(); setStatus('unloaded'); } catch (e: any) { setStatus(`unload error: ${e?.message}`); } }; const isLoading = status.startsWith('loading'); const canLoad = !isLoading; const canUnload = status === 'ready'; return ( <View style={styles.container}> <Text style={styles.status}>{status}</Text> <TouchableOpacity style={[styles.button, !canLoad && styles.disabled]} onPress={handleLoad} disabled={!canLoad} > <Text style={styles.buttonText}>Load</Text> </TouchableOpacity> <TouchableOpacity style={[styles.button, styles.unload, !canUnload && styles.disabled]} onPress={handleUnload} disabled={!canUnload} > <Text style={styles.buttonText}>Unload</Text> </TouchableOpacity> </View> ); } const styles = StyleSheet.create({ container: { flex: 1, alignItems: 'center', justifyContent: 'center', gap: 12 }, status: { fontSize: 16, marginBottom: 8 }, button: { backgroundColor: '#2563eb', paddingHorizontal: 32, paddingVertical: 14, borderRadius: 8 }, unload: { backgroundColor: '#dc2626' }, buttonText: { color: '#fff', fontSize: 16, fontWeight: '600' }, disabled: { opacity: 0.4 }, }); ``` ### Related issues #948 ### Checklist - [x] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [x] My changes generate no new warnings --------- Co-authored-by: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -436,6 +436,7 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
   JSI_HOST_FUNCTION(unload) {
     try {
       model->unload();
+      thisValue.asObject(runtime).setExternalMemoryPressure(runtime, 0);
     } catch (const RnExecutorchError &e) {
       jsi::Object errorData(runtime);
       errorData.setProperty(runtime, "code", e.getNumericCode());
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -255,6 +255,9 @@ int32_t LLM::getMaxContextLength() const {
   return runner_->get_max_context_length();
 }
 
-void LLM::unload() noexcept { runner_.reset(nullptr); }
+void LLM::unload() noexcept {
+  runner_.reset(nullptr);
+  BaseModel::unload();
+}
 
 } // namespace rnexecutorch::models::llm
diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts
@@ -121,6 +121,10 @@ export class LLMController {
         await ResourceFetcher.fs.readAsString(tokenizerConfigPath!)
       );
 
+      if (this.nativeModule) {
+        this.nativeModule.unload();
+      }
+
       this.nativeModule = await global.loadLLM(
         modelPath,
         tokenizerPath,

Original file line number	Diff line number	Diff line change
`@@ -255,6 +255,9 @@ int32_t LLM::getMaxContextLength() const {`
`255`	`255`	`return runner_->get_max_context_length();`
`256`	`256`	`}`
`257`	`257`
`258`		`-void LLM::unload() noexcept { runner_.reset(nullptr); }`
	`258`	`+void LLM::unload() noexcept {`
	`259`	`+ runner_.reset(nullptr);`
	`260`	`+ BaseModel::unload();`
	`261`	`+}`
`259`	`262`
`260`	`263`	`} // namespace rnexecutorch::models::llm`