Skip to content

Commit 84f8db7

Browse files
committed
talk-llama : sync llama.cpp
1 parent 4734056 commit 84f8db7

41 files changed

Lines changed: 1719 additions & 2212 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

examples/talk-llama/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ if (WHISPER_SDL2)
3434
unicode.cpp
3535
unicode-data.cpp
3636
${SRC_MODELS})
37-
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
37+
target_include_directories(${TARGET} PRIVATE . ${SDL2_INCLUDE_DIRS})
3838

3939
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
4040
install(TARGETS ${TARGET} RUNTIME)

examples/talk-llama/llama-adapter.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ struct llama_adapter_cvec {
3939
std::vector<ggml_tensor *> tensors; // per layer
4040
};
4141

42+
using llama_adapter_cvec_ptr = std::shared_ptr<llama_adapter_cvec>;
43+
4244
//
4345
// llama_adapter_lora
4446
//
@@ -84,3 +86,4 @@ struct llama_adapter_lora {
8486
};
8587

8688
using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
89+
using llama_adapter_loras_ptr = std::unique_ptr<llama_adapter_loras>;

examples/talk-llama/llama-arch.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
2626
{ LLM_ARCH_NEO_BERT, "neo-bert" },
2727
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
2828
{ LLM_ARCH_JINA_BERT_V3, "jina-bert-v3" },
29+
{ LLM_ARCH_EUROBERT, "eurobert" },
2930
{ LLM_ARCH_BLOOM, "bloom" },
3031
{ LLM_ARCH_STABLELM, "stablelm" },
3132
{ LLM_ARCH_QWEN, "qwen" },
@@ -79,6 +80,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7980
{ LLM_ARCH_T5, "t5" },
8081
{ LLM_ARCH_T5ENCODER, "t5encoder" },
8182
{ LLM_ARCH_JAIS, "jais" },
83+
{ LLM_ARCH_JAIS2, "jais2" },
8284
{ LLM_ARCH_NEMOTRON, "nemotron" },
8385
{ LLM_ARCH_NEMOTRON_H, "nemotron_h" },
8486
{ LLM_ARCH_NEMOTRON_H_MOE, "nemotron_h_moe" },
@@ -120,6 +122,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
120122
{ LLM_ARCH_RND1, "rnd1" },
121123
{ LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
122124
{ LLM_ARCH_MISTRAL3, "mistral3" },
125+
{ LLM_ARCH_PADDLEOCR, "paddleocr" },
123126
{ LLM_ARCH_MIMO2, "mimo2" },
124127
{ LLM_ARCH_STEP35, "step35" },
125128
{ LLM_ARCH_LLAMA_EMBED, "llama-embed" },
@@ -346,6 +349,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
346349
{ LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
347350
{ LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
348351
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
352+
{ LLM_TENSOR_FFN_GATE_UP_EXPS, "blk.%d.ffn_gate_up_exps" },
349353
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
350354
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
351355
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
@@ -367,6 +371,7 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
367371
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
368372
{ LLM_TENSOR_CLS, "cls" },
369373
{ LLM_TENSOR_CLS_OUT, "cls.output" },
374+
{ LLM_TENSOR_CLS_NORM, "cls.norm" },
370375
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
371376
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
372377
{ LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
@@ -737,6 +742,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
737742
case LLM_ARCH_INTERNLM2:
738743
case LLM_ARCH_GRANITE:
739744
case LLM_ARCH_ERNIE4_5:
745+
case LLM_ARCH_PADDLEOCR:
740746
case LLM_ARCH_SMOLLM3:
741747
case LLM_ARCH_DREAM:
742748
case LLM_ARCH_LLADA:
@@ -815,6 +821,20 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
815821
LLM_TENSOR_CLS,
816822
LLM_TENSOR_CLS_OUT,
817823
};
824+
case LLM_ARCH_EUROBERT:
825+
return {
826+
LLM_TENSOR_TOKEN_EMBD,
827+
LLM_TENSOR_OUTPUT_NORM,
828+
LLM_TENSOR_ATTN_NORM,
829+
LLM_TENSOR_ATTN_Q,
830+
LLM_TENSOR_ATTN_K,
831+
LLM_TENSOR_ATTN_V,
832+
LLM_TENSOR_ATTN_OUT,
833+
LLM_TENSOR_FFN_NORM,
834+
LLM_TENSOR_FFN_GATE,
835+
LLM_TENSOR_FFN_UP,
836+
LLM_TENSOR_FFN_DOWN,
837+
};
818838
case LLM_ARCH_MODERN_BERT:
819839
return {
820840
LLM_TENSOR_TOKEN_EMBD,
@@ -828,6 +848,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
828848
LLM_TENSOR_FFN_NORM,
829849
LLM_TENSOR_CLS,
830850
LLM_TENSOR_CLS_OUT,
851+
LLM_TENSOR_CLS_NORM,
831852
};
832853
case LLM_ARCH_JINA_BERT_V2:
833854
return {
@@ -984,6 +1005,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
9841005
LLM_TENSOR_FFN_GATE_EXPS,
9851006
LLM_TENSOR_FFN_DOWN_EXPS,
9861007
LLM_TENSOR_FFN_UP_EXPS,
1008+
LLM_TENSOR_FFN_GATE_UP_EXPS,
9871009
LLM_TENSOR_FFN_GATE_INP_SHEXP,
9881010
LLM_TENSOR_FFN_GATE_SHEXP,
9891011
LLM_TENSOR_FFN_DOWN_SHEXP,
@@ -1041,6 +1063,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
10411063
LLM_TENSOR_FFN_GATE_EXPS,
10421064
LLM_TENSOR_FFN_DOWN_EXPS,
10431065
LLM_TENSOR_FFN_UP_EXPS,
1066+
LLM_TENSOR_FFN_GATE_UP_EXPS,
10441067
LLM_TENSOR_FFN_GATE_INP_SHEXP,
10451068
LLM_TENSOR_FFN_GATE_SHEXP,
10461069
LLM_TENSOR_FFN_DOWN_SHEXP,
@@ -1581,6 +1604,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
15811604
LLM_TENSOR_FFN_GATE_EXPS,
15821605
LLM_TENSOR_FFN_DOWN_EXPS,
15831606
LLM_TENSOR_FFN_UP_EXPS,
1607+
LLM_TENSOR_FFN_GATE_UP_EXPS,
15841608
LLM_TENSOR_FFN_GATE_INP_SHEXP,
15851609
LLM_TENSOR_FFN_GATE_SHEXP,
15861610
LLM_TENSOR_FFN_DOWN_SHEXP,
@@ -1633,6 +1657,12 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
16331657
LLM_TENSOR_FFN_DOWN,
16341658
LLM_TENSOR_ATTN_POST_NORM,
16351659
LLM_TENSOR_FFN_POST_NORM,
1660+
LLM_TENSOR_NEXTN_EH_PROJ,
1661+
LLM_TENSOR_NEXTN_EMBED_TOKENS,
1662+
LLM_TENSOR_NEXTN_ENORM,
1663+
LLM_TENSOR_NEXTN_HNORM,
1664+
LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1665+
LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
16361666
};
16371667
case LLM_ARCH_GLM4_MOE:
16381668
return {
@@ -1783,6 +1813,20 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
17831813
LLM_TENSOR_FFN_GATE,
17841814
LLM_TENSOR_FFN_DOWN,
17851815
};
1816+
case LLM_ARCH_JAIS2:
1817+
return {
1818+
LLM_TENSOR_TOKEN_EMBD,
1819+
LLM_TENSOR_OUTPUT_NORM,
1820+
LLM_TENSOR_OUTPUT,
1821+
LLM_TENSOR_ATTN_NORM,
1822+
LLM_TENSOR_ATTN_Q,
1823+
LLM_TENSOR_ATTN_K,
1824+
LLM_TENSOR_ATTN_V,
1825+
LLM_TENSOR_ATTN_OUT,
1826+
LLM_TENSOR_FFN_NORM,
1827+
LLM_TENSOR_FFN_UP,
1828+
LLM_TENSOR_FFN_DOWN,
1829+
};
17861830
case LLM_ARCH_NEMOTRON_H:
17871831
return {
17881832
LLM_TENSOR_TOKEN_EMBD,
@@ -2512,6 +2556,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
25122556
{LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
25132557
{LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
25142558
{LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2559+
{LLM_TENSOR_CLS_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
25152560
{LLM_TENSOR_DENSE_2_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
25162561
{LLM_TENSOR_DENSE_3_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
25172562
{LLM_TENSOR_OUTPUT_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
@@ -2644,6 +2689,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
26442689
{LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
26452690
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
26462691
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2692+
{LLM_TENSOR_FFN_GATE_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
26472693
{LLM_TENSOR_FFN_DOWN_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
26482694
{LLM_TENSOR_FFN_GATE_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
26492695
{LLM_TENSOR_FFN_UP_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},

examples/talk-llama/llama-arch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ enum llm_arch {
3030
LLM_ARCH_NEO_BERT,
3131
LLM_ARCH_JINA_BERT_V2,
3232
LLM_ARCH_JINA_BERT_V3,
33+
LLM_ARCH_EUROBERT,
3334
LLM_ARCH_BLOOM,
3435
LLM_ARCH_STABLELM,
3536
LLM_ARCH_QWEN,
@@ -83,6 +84,7 @@ enum llm_arch {
8384
LLM_ARCH_T5,
8485
LLM_ARCH_T5ENCODER,
8586
LLM_ARCH_JAIS,
87+
LLM_ARCH_JAIS2,
8688
LLM_ARCH_NEMOTRON,
8789
LLM_ARCH_NEMOTRON_H,
8890
LLM_ARCH_NEMOTRON_H_MOE,
@@ -124,6 +126,7 @@ enum llm_arch {
124126
LLM_ARCH_RND1,
125127
LLM_ARCH_PANGU_EMBED,
126128
LLM_ARCH_MISTRAL3,
129+
LLM_ARCH_PADDLEOCR,
127130
LLM_ARCH_MIMO2,
128131
LLM_ARCH_STEP35,
129132
LLM_ARCH_LLAMA_EMBED,
@@ -370,6 +373,7 @@ enum llm_tensor {
370373
LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
371374
LLM_TENSOR_FFN_GATE_EXPS,
372375
LLM_TENSOR_FFN_UP_EXPS,
376+
LLM_TENSOR_FFN_GATE_UP_EXPS,
373377
LLM_TENSOR_FFN_DOWN_SHEXP,
374378
LLM_TENSOR_FFN_GATE_SHEXP,
375379
LLM_TENSOR_FFN_UP_SHEXP,
@@ -497,6 +501,7 @@ enum llm_tensor {
497501
LLM_TENSOR_ENC_OUTPUT_NORM,
498502
LLM_TENSOR_CLS,
499503
LLM_TENSOR_CLS_OUT,
504+
LLM_TENSOR_CLS_NORM,
500505
LLM_TENSOR_CONV1D,
501506
LLM_TENSOR_CONVNEXT_DW,
502507
LLM_TENSOR_CONVNEXT_NORM,

0 commit comments

Comments
 (0)