software-mansion
diff --git a/‎README.md‎
Lines changed: 7 additions & 6 deletions b/‎README.md‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎android/src/main/java/com/swmansion/rnexecutorch/LLM.kt‎
Lines changed: 9 additions & 44 deletions b/‎android/src/main/java/com/swmansion/rnexecutorch/LLM.kt‎
Lines changed: 9 additions & 44 deletions
diff --git a/‎android/src/main/java/com/swmansion/rnexecutorch/utils/llms/ConversationManager.kt‎
Lines changed: 0 additions & 68 deletions b/‎android/src/main/java/com/swmansion/rnexecutorch/utils/llms/ConversationManager.kt‎
Lines changed: 0 additions & 68 deletions
@@ -43,16 +43,17 @@ Add this to your component file:
 
 ```tsx
 import {
-  LLAMA3_2_3B_QLORA,
-  LLAMA3_2_3B_TOKENIZER,
   useLLM,
+  LLAMA3_2_1B,
+  LLAMA3_2_TOKENIZER_CONFIG,
 } from 'react-native-executorch';
 
 function MyComponent() {
   // Initialize the model 🚀
   const llama = useLLM({
-    modelSource: LLAMA3_2_3B_QLORA,
-    tokenizerSource: LLAMA3_2_3B_TOKENIZER,
+    modelSource: LLAMA3_2_1B,
+    tokenizerSource: LLAMA3_2_TOKENIZER,
+    tokenizerConfigSource: LLAMA3_2_TOKENIZER_CONFIG,
   });
   // ... rest of your component
 }
@@ -67,8 +68,8 @@ const handleGenerate = async () => {
   const prompt = 'The meaning of life is';
 
   // Generate text based on your desired prompt
-  const response = await llama.generate(prompt);
-  console.log('Llama says:', response);
+  await llama.runInference(prompt);
+  console.log('Llama says:', llama.response);
 };
 ```
 
 
@@ -3,21 +3,13 @@ package com.swmansion.rnexecutorch
 import android.util.Log
 import com.facebook.react.bridge.Promise
 import com.facebook.react.bridge.ReactApplicationContext
-import com.facebook.react.bridge.ReadableArray
-import com.swmansion.rnexecutorch.utils.ArrayUtils
-import com.swmansion.rnexecutorch.utils.llms.ChatRole
-import com.swmansion.rnexecutorch.utils.llms.ConversationManager
-import com.swmansion.rnexecutorch.utils.llms.END_OF_TEXT_TOKEN
 import org.pytorch.executorch.extension.llm.LlmCallback
 import org.pytorch.executorch.extension.llm.LlmModule
 
 class LLM(
   reactContext: ReactApplicationContext,
-) : NativeLLMSpec(reactContext),
-  LlmCallback {
-  private var llamaModule: LlmModule? = null
-  private var tempLlamaResponse = StringBuilder()
-  private lateinit var conversationManager: ConversationManager
+) : NativeLLMSpec(reactContext), LlmCallback {
+  private var llmModule: LlmModule? = null
 
   override fun getName(): String = NAME
 
@@ -27,7 +19,6 @@ class LLM(
 
   override fun onResult(result: String) {
     emitOnToken(result)
-    this.tempLlamaResponse.append(result)
   }
 
   override fun onStats(tps: Float) {
@@ -37,20 +28,10 @@ class LLM(
   override fun loadLLM(
     modelSource: String,
     tokenizerSource: String,
-    systemPrompt: String,
-    messageHistory: ReadableArray,
-    contextWindowLength: Double,
     promise: Promise,
   ) {
     try {
-      this.conversationManager =
-        ConversationManager(
-          contextWindowLength.toInt(),
-          systemPrompt,
-          ArrayUtils.createMapArray<String>(messageHistory),
-        )
-      llamaModule = LlmModule(modelSource, tokenizerSource, 0.7f)
-      this.tempLlamaResponse.clear()
+      llmModule = LlmModule(modelSource, tokenizerSource, 0.7f)
       promise.resolve("Model loaded successfully")
     } catch (e: Exception) {
       promise.reject("Model loading failed", e.message)
@@ -61,35 +42,19 @@ class LLM(
     input: String,
     promise: Promise,
   ) {
-    this.conversationManager.addResponse(input, ChatRole.USER)
-    val conversation = this.conversationManager.getConversation()
-
     Thread {
-      llamaModule!!.generate(conversation, this)
-
-      // When we call .interrupt(), the LLM doesn't produce EOT token, that also could happen when the
-      // generated sequence length is larger than specified in the JNI callback, hence we check if EOT
-      // is there and if not, we append it to the output and emit the EOT token to the JS side.
-      if (!this.tempLlamaResponse.endsWith(END_OF_TEXT_TOKEN)) {
-        this.onResult(END_OF_TEXT_TOKEN)
-      }
-
-      // We want to add the LLM response to the conversation once all the tokens are generated.
-      // Each token is appended to the tempLlamaResponse StringBuilder in onResult callback.
-      this.conversationManager.addResponse(this.tempLlamaResponse.toString(), ChatRole.ASSISTANT)
-      this.tempLlamaResponse.clear()
-      Log.d("ExecutorchLib", this.conversationManager.getConversation())
-    }.start()
-
-    promise.resolve("Inference completed successfully")
+      llmModule!!.generate(input, this)
+      promise.resolve("Inference completed successfully")
+    }
+      .start()
   }
 
   override fun interrupt() {
-    llamaModule!!.stop()
+    llmModule!!.stop()
   }
 
   override fun deleteModule() {
-    llamaModule = null
+    llmModule = null
   }
 
   companion object {