software-mansion
diff --git a/‎apps/llm/app/multimodal_llm/index.tsx‎
Lines changed: 2 additions & 2 deletions b/‎apps/llm/app/multimodal_llm/index.tsx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/docs/03-hooks/01-natural-language-processing/useLLM.md‎
Lines changed: 16 additions & 6 deletions b/‎docs/docs/03-hooks/01-natural-language-processing/useLLM.md‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎docs/docs/04-typescript-api/01-natural-language-processing/LLMModule.md‎
Lines changed: 11 additions & 3 deletions b/‎docs/docs/04-typescript-api/01-natural-language-processing/LLMModule.md‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 9 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp‎
Lines changed: 24 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h‎
Lines changed: 2 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/tests/integration/LLMTest.cpp‎
Lines changed: 25 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/tests/integration/LLMTest.cpp‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/StubRunner.h‎
Lines changed: 0 additions & 5 deletions b/‎packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/StubRunner.h‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/tests/unit/RunnerTest.cpp‎
Lines changed: 13 additions & 2 deletions b/‎packages/react-native-executorch/common/rnexecutorch/tests/unit/RunnerTest.cpp‎
Lines changed: 13 additions & 2 deletions
@@ -14,7 +14,7 @@ import {
 import { launchImageLibrary } from 'react-native-image-picker';
 import { useIsFocused } from '@react-navigation/native';
 import { useSafeAreaInsets } from 'react-native-safe-area-context';
-import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 import SendIcon from '../../assets/icons/send_icon.svg';
 import PauseIcon from '../../assets/icons/pause_icon.svg';
 import ColorPalette from '../../colors';
@@ -50,7 +50,7 @@ function MultimodalLLMScreen() {
   const [error, setError] = useState<string | null>(null);
 
   const vlm = useLLM({
-    model: LFM2_VL_1_6B_QUANTIZED,
+    model: LFM2_5_VL_1_6B_QUANTIZED,
   });
   const tokenCount = vlm.isReady ? vlm.getGeneratedTokenCount() : 0;
   const { stats, onMessageSend } = useLLMStats(
 
@@ -211,7 +211,15 @@ To configure model (i.e. change system prompt, load initial conversation history
 
   - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.
 
-  - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
+  - [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.
+
+  - [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.
+
+  - [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.
+
+:::info[Built-in models ship with sampling defaults]
+Model presets expose an optional [`generationConfig`](../../06-api-reference/interfaces/LLMProps.md) on the `model` prop. Whenever the upstream model card publishes recommended values (currently Qwen3 and LFM2-VL) the preset carries them and `useLLM` applies them automatically before `isReady` flips — you don't need to call `configure` just to get sensible defaults. Any fields you then pass to `configure` still override on a per-field basis.
+:::
 
 ### Model configuration example
 
@@ -282,7 +290,9 @@ useEffect(() => {
       outputTokenBatchSize: 15,
       batchTimeInterval: 100,
       temperature: 0.7,
-      topp: 0.9,
+      topP: 0.9,
+      minP: 0.05,
+      repetitionPenalty: 1.05,
     },
   });
 }, [configure]);
@@ -491,9 +501,9 @@ Some models support multimodal input — text and images together. To use them,
 ### Loading a VLM
 
 ```tsx
-import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 ```
 
 The `capabilities` field is already set on the model constant. You can also construct the model object explicitly:
@@ -514,7 +524,7 @@ Passing `capabilities` unlocks the typed `media` argument on `sendMessage`.
 ### Sending a message with an image
 
 ```tsx
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 
 const send = () => {
   llm.sendMessage('What is in this image?', {
@@ -537,7 +547,7 @@ The `imagePath` should be a local file path on the device.
 You can also use `generate` directly by setting `mediaPath` on user messages:
 
 ```tsx
-const llm = useLLM({ model: LFM2_VL_1_6B_QUANTIZED });
+const llm = useLLM({ model: LFM2_5_VL_1_6B_QUANTIZED });
 
 const handleGenerate = async () => {
   const chat: Message[] = [
 
@@ -107,17 +107,25 @@ To configure model (i.e. change system prompt, load initial conversation history
 
   - [`temperature`](../../06-api-reference/interfaces/GenerationConfig.md#temperature) - Scales output logits by the inverse of temperature. Controls the randomness / creativity of text generation.
 
-  - [`topp`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topp.
+  - [`topP`](../../06-api-reference/interfaces/GenerationConfig.md#topp) - Only samples from the smallest set of tokens whose cumulative probability exceeds topP. Range `[0, 1]`. Values of `0` or `1` disable top-p filtering.
+
+  - [`minP`](../../06-api-reference/interfaces/GenerationConfig.md#minp) - Minimum-probability threshold applied after softmax: tokens whose probability is below `minP * max_prob` are excluded from sampling. Range `[0, 1]`. Default `0` disables the filter. Stacks with `topP` when both are set.
+
+  - [`repetitionPenalty`](../../06-api-reference/interfaces/GenerationConfig.md#repetitionpenalty) - Multiplicative penalty applied to logits of tokens that already appeared in the prompt or the generated text. Values greater than `1` discourage repetition; default `1` disables the penalty.
+
+:::info[Built-in models ship with sampling defaults]
+Model presets expose an optional `generationConfig` that `LLMModule.fromModelName` applies automatically when available — for Qwen3 and LFM2-VL this means the model-card recommended sampling settings are in effect without any explicit `configure` call. Any fields you pass to `configure` still override on a per-field basis.
+:::
 
 ## Vision-Language Models (VLM)
 
 Some models support multimodal input — text and images together. To use them, pass `capabilities` in the model object when calling [`fromModelName`](../../06-api-reference/classes/LLMModule.md#frommodelname):
 
 ```typescript
-import { LLMModule, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import { LLMModule, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
 
 const llm = await LLMModule.fromModelName(
-  LFM2_VL_1_6B_QUANTIZED,
+  LFM2_5_VL_1_6B_QUANTIZED,
   undefined,
   (token) => console.log(token)
 );
 
@@ -140,6 +140,15 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        synchronousHostFunction<&Model::setTopp>,
                                        "setTopp"));
 
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       synchronousHostFunction<&Model::setMinP>,
+                                       "setMinP"));
+
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>,
+          synchronousHostFunction<&Model::setRepetitionPenalty>,
+          "setRepetitionPenalty"));
+
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>,
           synchronousHostFunction<&Model::getMaxContextLength>,
 
@@ -250,6 +250,30 @@ void LLM::setTopp(float topp) {
   runner_->set_topp(topp);
 }
 
+void LLM::setMinP(float minP) {
+  if (!runner_ || !runner_->is_loaded()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Can't configure a model that's not loaded");
+  }
+  if (minP < 0.0f || minP > 1.0f) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "Min-p must be between 0.0 and 1.0");
+  }
+  runner_->set_min_p(minP);
+}
+
+void LLM::setRepetitionPenalty(float repetitionPenalty) {
+  if (!runner_ || !runner_->is_loaded()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Can't configure a model that's not loaded");
+  }
+  if (repetitionPenalty < 0.0f) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
+                            "Repetition penalty must be non-negative");
+  }
+  runner_->set_repetition_penalty(repetitionPenalty);
+}
+
 int32_t LLM::getMaxContextLength() const {
   if (!runner_ || !runner_->is_loaded()) {
     throw RnExecutorchError(
 
@@ -38,6 +38,8 @@ class LLM : public BaseModel {
   void setCountInterval(size_t countInterval);
   void setTemperature(float temperature);
   void setTopp(float topp);
+  void setMinP(float minP);
+  void setRepetitionPenalty(float repetitionPenalty);
   void setTimeInterval(size_t timeInterval);
   int32_t getMaxContextLength() const;
 
 
@@ -151,6 +151,12 @@ add_rn_test(RunnerTests unit/RunnerTest.cpp
         integration/stubs/jsi_stubs.cpp
     LIBS tokenizers_deps
 )
+add_rn_test(SamplerTests unit/SamplerTest.cpp
+    SOURCES
+        ${COMMON_DIR}/runner/sampler.cpp
+        ${COMMON_DIR}/runner/arange_util.cpp
+    LIBS
+)
 add_rn_test(LogTests unit/LogTest.cpp)
 add_rn_test(FileUtilsTest unit/FileUtilsTest.cpp)
 add_rn_test(ImageProcessingTest unit/ImageProcessingTest.cpp
 
@@ -110,6 +110,31 @@ TEST_F(LLMTest, SetToppInvalidThrows) {
   EXPECT_THROW(model.setTopp(1.1f), RnExecutorchError);
 }
 
+TEST_F(LLMTest, SetMinP) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_NO_THROW(model.setMinP(0.0f));
+  EXPECT_NO_THROW(model.setMinP(0.15f));
+  EXPECT_NO_THROW(model.setMinP(1.0f));
+}
+
+TEST_F(LLMTest, SetMinPInvalidThrows) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_THROW(model.setMinP(-0.1f), RnExecutorchError);
+  EXPECT_THROW(model.setMinP(1.1f), RnExecutorchError);
+}
+
+TEST_F(LLMTest, SetRepetitionPenalty) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_NO_THROW(model.setRepetitionPenalty(1.0f));
+  EXPECT_NO_THROW(model.setRepetitionPenalty(1.05f));
+  EXPECT_NO_THROW(model.setRepetitionPenalty(2.0f));
+}
+
+TEST_F(LLMTest, SetRepetitionPenaltyInvalidThrows) {
+  LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
+  EXPECT_THROW(model.setRepetitionPenalty(-0.1f), RnExecutorchError);
+}
+
 TEST_F(LLMTest, SetCountInterval) {
   LLM model(kValidModelPath, kValidTokenizerPath, {}, mockInvoker_);
   EXPECT_NO_THROW(model.setCountInterval(5));
 
@@ -18,16 +18,11 @@ class StubRunner : public ::executorch::extension::llm::BaseLLMRunner {
     return ::executorch::runtime::Error::Ok;
   }
   void stop_impl() override {}
-  void set_temperature_impl(float t) override { last_temp_ = t; }
-  void set_topp_impl(float) override {}
-  void set_count_interval_impl(size_t) override {}
-  void set_time_interval_impl(size_t) override {}
 
   int32_t resolve_max(int32_t prompt, int32_t seq_len, int32_t ctx_len,
                       int32_t max_new = -1) const {
     return resolve_max_new_tokens(prompt, seq_len, ctx_len, max_new);
   }
 
   bool loaded_ = false;
-  float last_temp_ = -1.f;
 };
@@ -62,11 +62,10 @@ TEST(MultimodalInputTest, EmptyStringIsStillText) {
 // BaseLLMRunner via StubRunner
 // ============================================================================
 
-TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfigAndCallsImpl) {
+TEST(BaseLLMRunnerTest, SetTemperatureUpdatesConfig) {
   StubRunner runner(nullptr, "dummy");
   runner.set_temperature(0.42f);
   EXPECT_FLOAT_EQ(runner.config_.temperature, 0.42f);
-  EXPECT_FLOAT_EQ(runner.last_temp_, 0.42f);
 }
 
 TEST(BaseLLMRunnerTest, SetToppUpdatesConfig) {
@@ -89,3 +88,15 @@ TEST(BaseLLMRunnerTest, GenerateEmptyStringReturnsError) {
   auto err = runner.generate("", {}, {}, {});
   EXPECT_NE(err, ::executorch::runtime::Error::Ok);
 }
+
+TEST(BaseLLMRunnerTest, SetMinPUpdatesConfig) {
+  StubRunner runner(nullptr, "dummy");
+  runner.set_min_p(0.15f);
+  EXPECT_FLOAT_EQ(runner.config_.min_p, 0.15f);
+}
+
+TEST(BaseLLMRunnerTest, SetRepetitionPenaltyUpdatesConfig) {
+  StubRunner runner(nullptr, "dummy");
+  runner.set_repetition_penalty(1.05f);
+  EXPECT_FLOAT_EQ(runner.config_.repetition_penalty, 1.05f);
+}