Skip to content

Commit 5a2bbea

Browse files
committed
s/deepseek-v4-flash/deepseek4/g continued
1 parent 450cfe2 commit 5a2bbea

7 files changed

Lines changed: 32 additions & 32 deletions

File tree

conversion/deepseek.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,8 @@ def set_gguf_parameters(self):
464464

465465

466466
@ModelBase.register("DeepseekV4ForCausalLM")
467-
class DeepseekV4FlashModel(TextModel):
468-
model_arch = gguf.MODEL_ARCH.DEEPSEEK_V4_FLASH
467+
class DeepseekV4Model(TextModel):
468+
model_arch = gguf.MODEL_ARCH.DEEPSEEK4
469469
_skipped_mtp_tensors = 0
470470

471471
def __init__(self, *args, **kwargs):

gguf-py/gguf/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ class MODEL_ARCH(IntEnum):
456456
DEEPSEEK2 = auto()
457457
DEEPSEEK2OCR = auto()
458458
DEEPSEEK32 = auto()
459-
DEEPSEEK_V4_FLASH = auto()
459+
DEEPSEEK4 = auto()
460460
CHATGLM = auto()
461461
GLM4 = auto()
462462
GLM4_MOE = auto()
@@ -3033,7 +3033,7 @@ class MODEL_TENSOR(IntEnum):
30333033
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD,
30343034
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM,
30353035
],
3036-
MODEL_ARCH.DEEPSEEK_V4_FLASH: [
3036+
MODEL_ARCH.DEEPSEEK4: [
30373037
MODEL_TENSOR.TOKEN_EMBD,
30383038
MODEL_TENSOR.OUTPUT_NORM,
30393039
MODEL_TENSOR.OUTPUT,

src/llama-hparams.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ struct llama_hparams {
219219
uint32_t indexer_head_size = 0;
220220
uint32_t indexer_top_k = 0;
221221

222-
// DeepSeek-V4 Flash
222+
// DeepSeek-V4
223223
uint32_t dsv4_o_group_count = 0;
224224
uint32_t dsv4_o_lora_rank = 0;
225225
uint32_t dsv4_hc_mult = 0;

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ static llama_model * llama_model_mapping(llm_arch arch, const llama_model_params
179179
case LLM_ARCH_DEEPSEEK32:
180180
return new llama_model_deepseek32(params);
181181
case LLM_ARCH_DEEPSEEK4:
182-
return new llama_model_deepseek_v4_flash(params);
182+
return new llama_model_deepseek4(params);
183183
case LLM_ARCH_GLM_DSA:
184184
return new llama_model_glm_dsa(params);
185185
case LLM_ARCH_MISTRAL4:

src/llama-model.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ struct llama_layer {
465465
// openai-moe
466466
struct ggml_tensor * attn_sinks = nullptr;
467467

468-
// DeepSeek-V4 Flash
468+
// DeepSeek-V4
469469
struct ggml_tensor * attn_kv_norm = nullptr;
470470
struct ggml_tensor * hc_attn_fn = nullptr;
471471
struct ggml_tensor * hc_attn_base = nullptr;
@@ -568,7 +568,7 @@ struct llama_model {
568568
struct ggml_tensor * output_s = nullptr;
569569
struct ggml_tensor * output_in_s = nullptr;
570570

571-
// DeepSeek-V4 Flash
571+
// DeepSeek-V4
572572
struct ggml_tensor * hc_head_fn = nullptr;
573573
struct ggml_tensor * hc_head_base = nullptr;
574574
struct ggml_tensor * hc_head_scale = nullptr;

src/models/deepseek-v4.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include <string>
99

1010
static std::string dsv4_kv(const char * suffix) {
11-
return std::string("deepseek4") + suffix;
11+
return std::string("deepseek4.") + suffix;
1212
}
1313

1414
static float dsv4_rope_attn_factor(float freq_scale, float ext_factor) {
@@ -19,7 +19,7 @@ static float dsv4_rope_attn_factor(float freq_scale, float ext_factor) {
1919
return 1.0f / (1.0f + 0.1f*logf(1.0f/freq_scale));
2020
}
2121

22-
void llama_model_deepseek_v4_flash::load_arch_hparams(llama_model_loader & ml) {
22+
void llama_model_deepseek4::load_arch_hparams(llama_model_loader & ml) {
2323
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
2424
ml.get_key(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
2525
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
@@ -46,7 +46,7 @@ void llama_model_deepseek_v4_flash::load_arch_hparams(llama_model_loader & ml) {
4646
uint32_t n_compress_ratios = 0;
4747
ml.get_arr_n(dsv4_kv("attention.compress_ratios"), n_compress_ratios);
4848
if (n_compress_ratios < hparams.n_layer) {
49-
throw std::runtime_error("DeepSeek-V4 Flash compress_ratios is shorter than block_count");
49+
throw std::runtime_error("DeepSeek-V4 compress_ratios is shorter than block_count");
5050
}
5151
ml.get_arr(dsv4_kv("attention.compress_ratios"), hparams.dsv4_compress_ratios);
5252

@@ -55,10 +55,10 @@ void llama_model_deepseek_v4_flash::load_arch_hparams(llama_model_loader & ml) {
5555
ml.get_key(dsv4_kv("moe.score_func"), score_func);
5656
ml.get_key(dsv4_kv("moe.topk_method"), topk_method);
5757
if (score_func != "sqrtsoftplus") {
58-
throw std::runtime_error("DeepSeek-V4 Flash loader currently expects sqrtsoftplus MoE scoring");
58+
throw std::runtime_error("DeepSeek-V4 loader currently expects sqrtsoftplus MoE scoring");
5959
}
6060
if (topk_method != "noaux_tc") {
61-
throw std::runtime_error("DeepSeek-V4 Flash loader currently expects noaux_tc MoE top-k");
61+
throw std::runtime_error("DeepSeek-V4 loader currently expects noaux_tc MoE top-k");
6262
}
6363

6464
hparams.swa_type = LLAMA_SWA_TYPE_STANDARD;
@@ -70,7 +70,7 @@ void llama_model_deepseek_v4_flash::load_arch_hparams(llama_model_loader & ml) {
7070
}
7171
}
7272

73-
void llama_model_deepseek_v4_flash::load_arch_tensors(llama_model_loader &) {
73+
void llama_model_deepseek4::load_arch_tensors(llama_model_loader &) {
7474
LLAMA_LOAD_LOCALS;
7575

7676
const int64_t q_lora_rank = hparams.n_lora_q;
@@ -133,7 +133,7 @@ void llama_model_deepseek_v4_flash::load_arch_tensors(llama_model_loader &) {
133133
layer.indexer_comp_ape = create_tensor(tn(LLM_TENSOR_INDEXER_COMPRESSOR_APE, nullptr, i), {2 * n_embd_indexer, ratio}, 0);
134134
layer.indexer_comp_norm = create_tensor(tn(LLM_TENSOR_INDEXER_COMPRESSOR_NORM, "weight", i), {n_embd_indexer}, 0);
135135
} else if (ratio != 128) {
136-
throw std::runtime_error("DeepSeek-V4 Flash loader only supports compression ratios 0, 4, and 128");
136+
throw std::runtime_error("DeepSeek-V4 loader only supports compression ratios 0, 4, and 128");
137137
}
138138
}
139139

@@ -155,7 +155,7 @@ void llama_model_deepseek_v4_flash::load_arch_tensors(llama_model_loader &) {
155155
}
156156
}
157157

158-
std::unique_ptr<llm_graph_context> llama_model_deepseek_v4_flash::build_arch_graph(const llm_graph_params & params) const {
158+
std::unique_ptr<llm_graph_context> llama_model_deepseek4::build_arch_graph(const llm_graph_params & params) const {
159159
return std::make_unique<graph>(*this, params);
160160
}
161161

@@ -207,7 +207,7 @@ static ggml_tensor * dsv4_hc_affine(
207207
return x;
208208
}
209209

210-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_weighted_sum(
210+
ggml_tensor * llama_model_deepseek4::graph::build_hc_weighted_sum(
211211
ggml_tensor * x,
212212
ggml_tensor * weights) const {
213213
const int64_t hc = hparams.dsv4_hc_mult;
@@ -225,7 +225,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_weighted_sum(
225225
return acc;
226226
}
227227

228-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_sinkhorn(
228+
ggml_tensor * llama_model_deepseek4::graph::build_hc_sinkhorn(
229229
ggml_tensor * comb,
230230
int il) const {
231231
GGML_UNUSED(il);
@@ -262,7 +262,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_sinkhorn(
262262
return comb;
263263
}
264264

265-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_pre(
265+
ggml_tensor * llama_model_deepseek4::graph::build_hc_pre(
266266
ggml_tensor * x,
267267
ggml_tensor * hc_fn,
268268
ggml_tensor * hc_scale,
@@ -314,7 +314,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_pre(
314314
return build_hc_weighted_sum(x, pre);
315315
}
316316

317-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_post(
317+
ggml_tensor * llama_model_deepseek4::graph::build_hc_post(
318318
ggml_tensor * x,
319319
ggml_tensor * residual,
320320
ggml_tensor * post,
@@ -343,7 +343,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_post(
343343
return out;
344344
}
345345

346-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_head(
346+
ggml_tensor * llama_model_deepseek4::graph::build_hc_head(
347347
ggml_tensor * x,
348348
ggml_tensor * hc_fn,
349349
ggml_tensor * hc_scale,
@@ -367,7 +367,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hc_head(
367367
return build_hc_weighted_sum(x, pre);
368368
}
369369

370-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hca_compressed_kv_from_state(
370+
ggml_tensor * llama_model_deepseek4::graph::build_hca_compressed_kv_from_state(
371371
ggml_tensor * kv_state,
372372
ggml_tensor * score_state,
373373
ggml_tensor * state_read_idxs,
@@ -425,7 +425,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hca_compressed_kv_from
425425
return comp;
426426
}
427427

428-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_overlap_compressed_kv_from_state(
428+
ggml_tensor * llama_model_deepseek4::graph::build_overlap_compressed_kv_from_state(
429429
ggml_tensor * kv_state,
430430
ggml_tensor * score_state,
431431
ggml_tensor * state_read_idxs,
@@ -507,7 +507,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_overlap_compressed_kv_
507507
return comp;
508508
}
509509

510-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_lid_top_k(
510+
ggml_tensor * llama_model_deepseek4::graph::build_lid_top_k(
511511
const llama_model & model,
512512
llm_graph_input_dsv4 * inp_dsv4,
513513
ggml_tensor * qr,
@@ -597,7 +597,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_lid_top_k(
597597
return top_k;
598598
}
599599

600-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_top_k_mask(
600+
ggml_tensor * llama_model_deepseek4::graph::build_top_k_mask(
601601
ggml_tensor * kq_mask,
602602
ggml_tensor * top_k,
603603
const char * name,
@@ -626,7 +626,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_top_k_mask(
626626
return kq_mask_top_k;
627627
}
628628

629-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_csa_lid_attention(
629+
ggml_tensor * llama_model_deepseek4::graph::build_csa_lid_attention(
630630
const llama_model & model,
631631
llm_graph_input_dsv4 * inp_dsv4,
632632
llm_graph_input_attn_kv_iswa * inp_attn,
@@ -689,7 +689,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_csa_lid_attention(
689689
return out;
690690
}
691691

692-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hca_attention(
692+
ggml_tensor * llama_model_deepseek4::graph::build_hca_attention(
693693
llm_graph_input_dsv4 * inp_dsv4,
694694
llm_graph_input_attn_kv_iswa * inp_attn,
695695
ggml_tensor * q,
@@ -746,7 +746,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_hca_attention(
746746
return out;
747747
}
748748

749-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_raw_attention(
749+
ggml_tensor * llama_model_deepseek4::graph::build_raw_attention(
750750
llm_graph_input_attn_kv_iswa * inp_attn,
751751
ggml_tensor * q,
752752
ggml_tensor * kv,
@@ -782,7 +782,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_raw_attention(
782782
return out;
783783
}
784784

785-
ggml_tensor * llama_model_deepseek_v4_flash::graph::build_attention(
785+
ggml_tensor * llama_model_deepseek4::graph::build_attention(
786786
const llama_model & model,
787787
llm_graph_input_dsv4 * inp_dsv4,
788788
ggml_tensor * cur,
@@ -1080,7 +1080,7 @@ ggml_tensor * llama_model_deepseek_v4_flash::graph::build_attention(
10801080
return out;
10811081
}
10821082

1083-
llama_model_deepseek_v4_flash::graph::graph(const llama_model & model, const llm_graph_params & params) :
1083+
llama_model_deepseek4::graph::graph(const llama_model & model, const llm_graph_params & params) :
10841084
llm_graph_context(params) {
10851085
ggml_tensor * cur;
10861086

src/models/models.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,8 @@ struct llama_model_deepseek32 : public llama_model_base {
10551055
};
10561056

10571057

1058-
struct llama_model_deepseek_v4_flash : public llama_model_base {
1059-
llama_model_deepseek_v4_flash(const struct llama_model_params & params) : llama_model_base(params) {}
1058+
struct llama_model_deepseek4 : public llama_model_base {
1059+
llama_model_deepseek4(const struct llama_model_params & params) : llama_model_base(params) {}
10601060
void load_arch_hparams(llama_model_loader & ml) override;
10611061
void load_arch_tensors(llama_model_loader & ml) override;
10621062

0 commit comments

Comments
 (0)