@@ -3217,8 +3217,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
32173217 cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
32183218 }
32193219
3220- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
3221- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias") , {n_embd}, 0);
3220+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0);
3221+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0), {n_embd}, 0);
32223222
32233223 for (int i = 0; i < n_layer; ++i) {
32243224 auto & layer = layers[i];
@@ -3265,7 +3265,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
32653265 case LLM_ARCH_MODERN_BERT:
32663266 {
32673267 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
3268- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
3268+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0);
32693269
32703270 output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
32713271
@@ -3348,8 +3348,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33483348 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // word_embeddings
33493349 type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, 0); // token_type_embeddings
33503350
3351- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); // LayerNorm
3352- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias") , {n_embd}, 0); //LayerNorm bias
3351+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0); // LayerNorm
3352+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0), {n_embd}, 0); // LayerNorm bias
33533353
33543354 cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, 1}, TENSOR_NOT_REQUIRED);
33553355 cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {1}, TENSOR_NOT_REQUIRED);
@@ -3400,8 +3400,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
34003400 case LLM_ARCH_BLOOM:
34013401 {
34023402 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
3403- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
3404- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias") , {n_embd}, 0);
3403+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0);
3404+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0), {n_embd}, 0);
34053405
34063406 // output
34073407 output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
@@ -5780,8 +5780,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
57805780 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
57815781
57825782 // Block 0, LN0
5783- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
5784- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0);
5783+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0);
5784+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0 ), {n_embd}, 0);
57855785
57865786 // output
57875787 output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
@@ -5895,8 +5895,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
58955895 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
58965896
58975897 // Block 0, LN0
5898- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
5899- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0);
5898+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {n_embd}, 0);
5899+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0 ), {n_embd}, 0);
59005900
59015901 // output
59025902 output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
@@ -6067,8 +6067,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
60676067 {
60686068 tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hparams.n_embd, n_vocab}, 0);
60696069
6070- conv1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, hparams.n_embd, hparams.posnet.n_embd}, 0);
6071- conv1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias") , {1, hparams.posnet.n_embd}, 0);
6070+ conv1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight", 0 ), {7, hparams.n_embd, hparams.posnet.n_embd}, 0);
6071+ conv1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias", 0), {1, hparams.posnet.n_embd}, 0);
60726072
60736073 // posnet
60746074 {
@@ -6133,8 +6133,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
61336133
61346134 GGML_ASSERT(hparams.posnet.n_embd == hparams.convnext.n_embd);
61356135
6136- tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {hparams.posnet.n_embd}, 0);
6137- tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias") , {hparams.posnet.n_embd}, 0);
6136+ tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight", 0 ), {hparams.posnet.n_embd}, 0);
6137+ tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias", 0), {hparams.posnet.n_embd}, 0);
61386138
61396139 // convnext
61406140 {
0 commit comments