Skip to content

Commit 0dbfa66

Browse files
forforever73lvyichen
andauthored
return filter to save memory (ggml-org#24125)
Co-authored-by: lvyichen <lvyichen@stepfun.com>
1 parent e802356 commit 0dbfa66

1 file changed

Lines changed: 9 additions & 0 deletions

File tree

src/llama-model.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2112,6 +2112,15 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
21122112
filter = [n_main](int32_t il) { return (uint32_t)il >= n_main; };
21132113
}
21142114

2115+
if (arch == LLM_ARCH_STEP35 && hparams.nextn_predict_layers > 0) {
2116+
const uint32_t n_main = hparams.n_layer - hparams.nextn_predict_layers;
2117+
if (params.ctx_type == LLAMA_CONTEXT_TYPE_MTP) {
2118+
filter = [n_main](int32_t il) { return (uint32_t)il >= n_main; };
2119+
} else {
2120+
filter = [n_main](int32_t il) { return (uint32_t)il < n_main; };
2121+
}
2122+
}
2123+
21152124
if (hparams.swa_type != LLAMA_SWA_TYPE_NONE) {
21162125
GGML_ASSERT(hparams.is_swa_any());
21172126

0 commit comments

Comments
 (0)