Skip to content

Commit d99045f

Browse files
cchuterclaude
andcommitted
llama: add DeepSeek V4 Flash model + architecture (CPU)
DeepSeek-V4-Flash model: graph (src/models/deepseek4.cpp), arch / hparams / model-loader wiring, the dsv4_* compressed-KV extension to llama_memory_hybrid_iswa, GGUF conversion (conversion/deepseek.py + constants/writer keys), and the V4 chat template. Standard build_attn_mha attention path; no DeepSeek Sparse Attention. Exercises the DSV4 ops from the preceding commit so they are testable end-to-end on CPU. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9811b19 commit d99045f

27 files changed

Lines changed: 3596 additions & 63 deletions

common/chat.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,7 @@ static common_chat_params common_chat_params_init_gigachat_v3(
16611661
static common_chat_params common_chat_params_init_deepseek_v3_2(const common_chat_template & tmpl,
16621662
const autoparser::generation_params & inputs) {
16631663
common_chat_params data;
1664+
const auto & src = tmpl.source();
16641665

16651666
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
16661667
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
@@ -1681,8 +1682,9 @@ static common_chat_params common_chat_params_init_deepseek_v3_2(const common_cha
16811682
const std::string DSML = "|DSML|";
16821683
const std::string THINK_START = "<think>";
16831684
const std::string THINK_END = "</think>";
1684-
const std::string FC_START = "<" + DSML + "function_calls>";
1685-
const std::string FC_END = "</" + DSML + "function_calls>";
1685+
const std::string FC_NAME = src.find("function_calls") != std::string::npos ? "function_calls" : "tool_calls";
1686+
const std::string FC_START = "<" + DSML + FC_NAME + ">";
1687+
const std::string FC_END = "</" + DSML + FC_NAME + ">";
16861688
const std::string INVOKE_START = "<" + DSML + "invoke";
16871689
const std::string INVOKE_END = "</" + DSML + "invoke>";
16881690
const std::string PARAM_START = "<" + DSML + "parameter";
@@ -2093,12 +2095,12 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
20932095
return common_chat_params_init_gigachat_v3(tmpl, params);
20942096
}
20952097

2096-
// DeepSeek V3.2 format detection: template defines dsml_token and uses it for tool calls.
2098+
// DeepSeek DSML format detection: template defines dsml_token and uses it for tool calls.
20972099
// The template source contains the token as a variable assignment, not as a literal in markup.
20982100
if (src.find("dsml_token") != std::string::npos &&
2099-
src.find("function_calls") != std::string::npos &&
2101+
(src.find("function_calls") != std::string::npos || src.find("tool_calls") != std::string::npos) &&
21002102
src.find("DSML") != std::string::npos) {
2101-
LOG_DBG("Using specialized template: DeepSeek V3.2\n");
2103+
LOG_DBG("Using specialized template: DeepSeek DSML\n");
21022104
return common_chat_params_init_deepseek_v3_2(tmpl, params);
21032105
}
21042106

conversion/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"DeepseekForCausalLM": "deepseek",
4848
"DeepseekV2ForCausalLM": "deepseek",
4949
"DeepseekV3ForCausalLM": "deepseek",
50+
"DeepseekV4ForCausalLM": "deepseek",
5051
"DistilBertForMaskedLM": "bert",
5152
"DistilBertForSequenceClassification": "bert",
5253
"DistilBertModel": "bert",

0 commit comments

Comments
 (0)