@@ -497,4 +497,56 @@ TEST_F(RunnerTest, GenerateEmptyWithoutPrefillFails) {
497497 EXPECT_EQ (err, Error::InvalidState);
498498}
499499
500+ // Test that TextTokenGenerator works correctly in non-kv-cache mode.
501+ // Exercises the code path fixed by reserving capacity before from_blob:
502+ // without reserve(), vector reallocation would invalidate the data pointer.
503+ TEST_F (RunnerTest, NonKvCacheGenerateCompletesSuccessfully) {
504+ auto tokenizer = createMockTokenizer ();
505+ auto text_decoder_runner = createMockTextDecoderRunner ();
506+
507+ // In non-kv-cache mode, the input tensor should grow by 1 token each step.
508+ // Verify data is readable each time (catches dangling pointers under ASan).
509+ int step_count = 0 ;
510+ ON_CALL (*text_decoder_runner, step)
511+ .WillByDefault (
512+ [&](executorch::extension::TensorPtr& tokens_tensor, int64_t ) {
513+ // Initial tokens = 4 (prompt 1,2,3 + prefill token 4).
514+ // Each step appends one token before the next call.
515+ int64_t expected_size = 4 + step_count;
516+ EXPECT_EQ (tokens_tensor->size (1 ), expected_size);
517+
518+ // Read data to verify the pointer is still valid.
519+ auto * data = tokens_tensor->const_data_ptr <int64_t >();
520+ EXPECT_EQ (data[0 ], 1 ); // first prompt token
521+ EXPECT_EQ (data[1 ], 2 );
522+ EXPECT_EQ (data[2 ], 3 );
523+ EXPECT_EQ (data[3 ], 4 ); // prefill token
524+
525+ step_count++;
526+ return Result<executorch::aten::Tensor>(tensor);
527+ });
528+
529+ Stats stats;
530+ auto eos_ids = std::make_unique<std::unordered_set<uint64_t >>(
531+ std::unordered_set<uint64_t >{100 });
532+ TextTokenGenerator generator (
533+ tokenizer.get (),
534+ text_decoder_runner.get (),
535+ false , // use_kv_cache = false
536+ std::move (eos_ids),
537+ &stats);
538+
539+ // 4 tokens: prompt (1,2,3) + prefill token (4)
540+ std::vector<uint64_t > tokens = {1 , 2 , 3 , 4 };
541+ // Generate enough tokens that the vector would reallocate without reserve.
542+ int32_t max_new_tokens = 20 ;
543+
544+ auto result = generator.generate (
545+ tokens, 4 , max_new_tokens, 0 .0f , [](const std::string&) {});
546+
547+ EXPECT_TRUE (result.ok ());
548+ EXPECT_EQ (result.get (), max_new_tokens);
549+ EXPECT_EQ (step_count, max_new_tokens);
550+ }
551+
500552} // namespace
0 commit comments