Skip to content

Commit c05c80a

Browse files
committed
Move string prefill overloads to .cpp, fix stale prefill_next_token_
Move the two string convenience prefill methods from inline in text_llm_runner.h to text_llm_runner.cpp. This removes the header's dependency on multimodal_input.h — only a forward declaration of MultimodalInput (from irunner.h) is needed in the header. Also fix a bug where MultimodalRunner::generate(vector) left prefill_next_token_ set after consuming the token. A subsequent generate("") would incorrectly reuse the stale token. Clear it after extracting the value. This PR was authored with the assistance of Claude.
1 parent 72d6034 commit c05c80a

3 files changed

Lines changed: 22 additions & 10 deletions

File tree

extension/llm/runner/multimodal_runner.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ Error MultimodalRunner::generate(
304304
auto prefill_result = prefill(inputs, config.num_bos, config.num_eos);
305305
ET_CHECK_OK_OR_RETURN_ERROR(prefill_result.error());
306306
uint64_t cur_token = prefill_result.get();
307+
prefill_next_token_.reset();
307308

308309
return decode_from_token(cur_token, config, wrapped_callback, stats_callback);
309310
}

extension/llm/runner/text_llm_runner.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
// The module takes in a string as input and emits a string as output.
1212

1313
#include <executorch/extension/llm/runner/io_manager/io_manager.h>
14+
#include <executorch/extension/llm/runner/multimodal_input.h>
1415
#include <executorch/extension/llm/runner/text_llm_runner.h>
1516
#include <executorch/extension/llm/runner/util.h>
1617
#include <executorch/runtime/platform/runtime.h>
@@ -289,6 +290,21 @@ Result<uint64_t> TextLLMRunner::prefill(
289290
return prefill_next_token_.value();
290291
}
291292

293+
Result<uint64_t> TextLLMRunner::prefill(
294+
const std::string& prompt,
295+
int32_t num_bos,
296+
int32_t num_eos) {
297+
std::vector<MultimodalInput> inputs;
298+
inputs.emplace_back(MultimodalInput(prompt));
299+
return prefill(inputs, num_bos, num_eos);
300+
}
301+
302+
Result<uint64_t> TextLLMRunner::prefill(
303+
const std::string& prompt,
304+
const GenerationConfig& config) {
305+
return prefill(prompt, config.num_bos, config.num_eos);
306+
}
307+
292308
Error TextLLMRunner::warmup(const std::string& prompt, int32_t max_new_tokens) {
293309
// Create a GenerationConfig for warmup
294310
GenerationConfig config;

extension/llm/runner/text_llm_runner.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <unordered_map>
2020

2121
#include <executorch/extension/llm/runner/irunner.h>
22-
#include <executorch/extension/llm/runner/multimodal_input.h>
2322
#include <executorch/extension/llm/runner/stats.h>
2423
#include <executorch/extension/llm/runner/text_decoder_runner.h>
2524
#include <executorch/extension/llm/runner/text_prefiller.h>
@@ -120,22 +119,18 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
120119
/**
121120
* Convenience overload: prefill a single text prompt.
122121
*/
123-
::executorch::runtime::Result<uint64_t>
124-
prefill(const std::string& prompt, int32_t num_bos = 0, int32_t num_eos = 0) {
125-
std::vector<MultimodalInput> inputs;
126-
inputs.emplace_back(MultimodalInput(prompt));
127-
return prefill(inputs, num_bos, num_eos);
128-
}
122+
::executorch::runtime::Result<uint64_t> prefill(
123+
const std::string& prompt,
124+
int32_t num_bos = 0,
125+
int32_t num_eos = 0);
129126

130127
/**
131128
* Prefill a text prompt using GenerationConfig.
132129
* Deprecated: prefer prefill(prompt, num_bos, num_eos).
133130
*/
134131
::executorch::runtime::Result<uint64_t> prefill(
135132
const std::string& prompt,
136-
const GenerationConfig& config) {
137-
return prefill(prompt, config.num_bos, config.num_eos);
138-
}
133+
const GenerationConfig& config);
139134

140135
/**
141136
* @brief Warms up the model with a sample prompt

0 commit comments

Comments
 (0)