@@ -1059,12 +1059,16 @@ llm_graph_qkv llm_graph_context::build_qkv(
10591059
10601060 if (layer.wqkv ) {
10611061 // fused QKV path
1062- ggml_tensor * qkv = build_lora_mm (layer.wqkv , cur);
1062+ ggml_tensor * qkv = build_lora_mm (layer.wqkv , cur, layer. wqkv_s );
10631063 cb (qkv, " wqkv" , il);
10641064 if (layer.bqkv ) {
10651065 qkv = ggml_add (ctx0, qkv, layer.bqkv );
10661066 cb (qkv, " bqkv" , il);
10671067 }
1068+ if (hparams.f_clamp_kqv > 0 .0f ) {
1069+ qkv = ggml_clamp (ctx0, qkv, -hparams.f_clamp_kqv , hparams.f_clamp_kqv );
1070+ cb (qkv, " wqkv_clamped" , il);
1071+ }
10681072 Qcur = ggml_view_3d (ctx0, qkv, n_embd_head, n_head, n_tokens,
10691073 ggml_element_size (qkv) * n_embd_head, qkv->nb [1 ], 0 );
10701074 Kcur = ggml_view_3d (ctx0, qkv, n_embd_head, n_head_kv, n_tokens,
@@ -1081,18 +1085,30 @@ llm_graph_qkv llm_graph_context::build_qkv(
10811085 Qcur = ggml_add (ctx0, Qcur, layer.bq );
10821086 cb (Qcur, " Qcur" , il);
10831087 }
1088+ if (hparams.f_clamp_kqv > 0 .0f ) {
1089+ Qcur = ggml_clamp (ctx0, Qcur, -hparams.f_clamp_kqv , hparams.f_clamp_kqv );
1090+ cb (Qcur, " Qcur_clamped" , il);
1091+ }
10841092 Kcur = build_lora_mm (layer.wk , cur, layer.wk_s );
10851093 cb (Kcur, " Kcur" , il);
10861094 if (layer.bk ) {
10871095 Kcur = ggml_add (ctx0, Kcur, layer.bk );
10881096 cb (Kcur, " Kcur" , il);
10891097 }
1098+ if (hparams.f_clamp_kqv > 0 .0f ) {
1099+ Kcur = ggml_clamp (ctx0, Kcur, -hparams.f_clamp_kqv , hparams.f_clamp_kqv );
1100+ cb (Kcur, " Kcur_clamped" , il);
1101+ }
10901102 Vcur = build_lora_mm (layer.wv , cur, layer.wv_s );
10911103 cb (Vcur, " Vcur" , il);
10921104 if (layer.bv ) {
10931105 Vcur = ggml_add (ctx0, Vcur, layer.bv );
10941106 cb (Vcur, " Vcur" , il);
10951107 }
1108+ if (hparams.f_clamp_kqv > 0 .0f ) {
1109+ Vcur = ggml_clamp (ctx0, Vcur, -hparams.f_clamp_kqv , hparams.f_clamp_kqv );
1110+ cb (Vcur, " Vcur_clamped" , il);
1111+ }
10961112 Qcur = ggml_reshape_3d (ctx0, Qcur, n_embd_head, n_head, n_tokens);
10971113 Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
10981114 Vcur = ggml_reshape_3d (ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
@@ -1105,6 +1121,7 @@ llm_graph_qkv llm_graph_context::build_qkv(
11051121 return { Qcur, Kcur, Vcur };
11061122}
11071123
1124+
11081125ggml_tensor * llm_graph_context::build_ffn (
11091126 ggml_tensor * cur,
11101127 ggml_tensor * up,
0 commit comments