Skip to content

Commit 8035c93

Browse files
committed
Revert "llama : set n_outputs to 1 to avoid 0 outputs mean-pooling (ggml-org#15791)"
1 parent ca82211 commit 8035c93

1 file changed

Lines changed: 0 additions & 4 deletions

File tree

src/llama-context.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,6 @@ llama_context::llama_context(
285285
const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
286286
const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
287287

288-
// avoid reserving graphs with zero outputs
289-
n_outputs = 1;
290-
291288
LLAMA_LOG_DEBUG("%s: worst-case: n_tokens = %d, n_seqs = %d, n_outputs = %d\n", __func__, n_tokens, n_seqs, n_outputs);
292289

293290
// resolve automatic Flash Attention use
@@ -1371,7 +1368,6 @@ llm_graph_result * llama_context::get_gf_res_reserve() const {
13711368

13721369
ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx, bool split_only) {
13731370
//LLAMA_LOG_DEBUG("%s: reserving a graph for ubatch with n_tokens = %4u, n_seqs = %2u, n_outputs = %4u\n", __func__, n_tokens, n_seqs, n_outputs);
1374-
GGML_ASSERT_CONTINUE(n_outputs >= 1);
13751371

13761372
if (n_tokens % n_seqs != 0) {
13771373
n_tokens = ((n_tokens + (n_seqs - 1)) / n_seqs) * n_seqs; // round to next multiple of n_seqs

0 commit comments

Comments
 (0)