@@ -70,17 +70,17 @@ ggml_cgraph * clip_graph_llava::build() {
7070
7171 // self-attention
7272 {
73- ggml_tensor * Qcur = ggml_mul_mat (ctx0, layer.q_w , cur);
73+ ggml_tensor * Qcur = build_mm ( layer.q_w , cur);
7474 if (layer.q_b ) {
7575 Qcur = ggml_add (ctx0, Qcur, layer.q_b );
7676 }
7777
78- ggml_tensor * Kcur = ggml_mul_mat (ctx0, layer.k_w , cur);
78+ ggml_tensor * Kcur = build_mm ( layer.k_w , cur);
7979 if (layer.k_b ) {
8080 Kcur = ggml_add (ctx0, Kcur, layer.k_b );
8181 }
8282
83- ggml_tensor * Vcur = ggml_mul_mat (ctx0, layer.v_w , cur);
83+ ggml_tensor * Vcur = build_mm ( layer.v_w , cur);
8484 if (layer.v_b ) {
8585 Vcur = ggml_add (ctx0, Vcur, layer.v_b );
8686 }
@@ -164,17 +164,17 @@ ggml_cgraph * clip_graph_llava::build() {
164164
165165 // llava projector
166166 if (proj_type == PROJECTOR_TYPE_MLP) {
167- embeddings = ggml_mul_mat (ctx0, model.mm_0_w , embeddings);
167+ embeddings = build_mm ( model.mm_0_w , embeddings);
168168 embeddings = ggml_add (ctx0, embeddings, model.mm_0_b );
169169
170170 embeddings = ggml_gelu (ctx0, embeddings);
171171 if (model.mm_2_w ) {
172- embeddings = ggml_mul_mat (ctx0, model.mm_2_w , embeddings);
172+ embeddings = build_mm ( model.mm_2_w , embeddings);
173173 embeddings = ggml_add (ctx0, embeddings, model.mm_2_b );
174174 }
175175 }
176176 else if (proj_type == PROJECTOR_TYPE_MLP_NORM) {
177- embeddings = ggml_mul_mat (ctx0, model.mm_0_w , embeddings);
177+ embeddings = build_mm ( model.mm_0_w , embeddings);
178178 embeddings = ggml_add (ctx0, embeddings, model.mm_0_b );
179179 // ggml_tensor_printf(embeddings, "mm_0_w",0,true,false);
180180 // First LayerNorm
@@ -186,7 +186,7 @@ ggml_cgraph * clip_graph_llava::build() {
186186 embeddings = ggml_gelu (ctx0, embeddings);
187187
188188 // Second linear layer
189- embeddings = ggml_mul_mat (ctx0, model.mm_3_w , embeddings);
189+ embeddings = build_mm ( model.mm_3_w , embeddings);
190190 embeddings = ggml_add (ctx0, embeddings, model.mm_3_b );
191191
192192 // Second LayerNorm
@@ -197,10 +197,10 @@ ggml_cgraph * clip_graph_llava::build() {
197197 else if (proj_type == PROJECTOR_TYPE_LDP) {
198198 // MobileVLM projector
199199 int n_patch = 24 ;
200- ggml_tensor * mlp_1 = ggml_mul_mat (ctx0, model.mm_model_mlp_1_w , embeddings);
200+ ggml_tensor * mlp_1 = build_mm ( model.mm_model_mlp_1_w , embeddings);
201201 mlp_1 = ggml_add (ctx0, mlp_1, model.mm_model_mlp_1_b );
202202 mlp_1 = ggml_gelu (ctx0, mlp_1);
203- ggml_tensor * mlp_3 = ggml_mul_mat (ctx0, model.mm_model_mlp_3_w , mlp_1);
203+ ggml_tensor * mlp_3 = build_mm ( model.mm_model_mlp_3_w , mlp_1);
204204 mlp_3 = ggml_add (ctx0, mlp_3, model.mm_model_mlp_3_b );
205205 // mlp_3 shape = [1, 576, 2048], ne = [2048, 576, 1, 1]
206206
@@ -229,10 +229,10 @@ ggml_cgraph * clip_graph_llava::build() {
229229 // block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
230230 // pointwise conv
231231 block_1 = ggml_reshape_2d (ctx0, block_1, block_1->ne [0 ]*block_1->ne [1 ]*block_1->ne [2 ], block_1->ne [3 ]);
232- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_1_block_1_fc1_w , block_1);
232+ block_1 = build_mm ( model.mm_model_block_1_block_1_fc1_w , block_1);
233233 block_1 = ggml_add (ctx0, block_1, model.mm_model_block_1_block_1_fc1_b );
234234 block_1 = ggml_relu (ctx0, block_1);
235- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_1_block_1_fc2_w , block_1);
235+ block_1 = build_mm ( model.mm_model_block_1_block_1_fc2_w , block_1);
236236 block_1 = ggml_add (ctx0, block_1, model.mm_model_block_1_block_1_fc2_b );
237237 block_1 = ggml_hardsigmoid (ctx0, block_1);
238238 // block_1_hw shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1], block_1 shape = [1, 2048], ne = [2048, 1, 1, 1]
@@ -244,7 +244,7 @@ ggml_cgraph * clip_graph_llava::build() {
244244 block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 1 , 0 , 2 , 3 ));
245245
246246 // block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
247- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_1_block_2_0_w , block_1);
247+ block_1 = build_mm ( model.mm_model_block_1_block_2_0_w , block_1);
248248 block_1 = ggml_reshape_4d (ctx0, block_1, block_1->ne [0 ], w, h, block_1->ne [3 ]);
249249
250250 // block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
@@ -277,10 +277,10 @@ ggml_cgraph * clip_graph_llava::build() {
277277 // block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
278278 // pointwise conv
279279 block_1 = ggml_reshape_2d (ctx0, block_1, block_1->ne [0 ]*block_1->ne [1 ]*block_1->ne [2 ], block_1->ne [3 ]);
280- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_2_block_1_fc1_w , block_1);
280+ block_1 = build_mm ( model.mm_model_block_2_block_1_fc1_w , block_1);
281281 block_1 = ggml_add (ctx0, block_1, model.mm_model_block_2_block_1_fc1_b );
282282 block_1 = ggml_relu (ctx0, block_1);
283- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_2_block_1_fc2_w , block_1);
283+ block_1 = build_mm ( model.mm_model_block_2_block_1_fc2_w , block_1);
284284 block_1 = ggml_add (ctx0, block_1, model.mm_model_block_2_block_1_fc2_b );
285285 block_1 = ggml_hardsigmoid (ctx0, block_1);
286286
@@ -292,7 +292,7 @@ ggml_cgraph * clip_graph_llava::build() {
292292 block_1 = ggml_reshape_3d (ctx0, block_1, w*h, block_1->ne [2 ], block_1->ne [3 ]);
293293 block_1 = ggml_cont (ctx0, ggml_permute (ctx0, block_1, 1 , 0 , 2 , 3 ));
294294 // block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
295- block_1 = ggml_mul_mat (ctx0, model.mm_model_block_2_block_2_0_w , block_1);
295+ block_1 = build_mm ( model.mm_model_block_2_block_2_0_w , block_1);
296296 block_1 = ggml_reshape_4d (ctx0, block_1, block_1->ne [0 ], w, h, block_1->ne [3 ]);
297297
298298
@@ -307,10 +307,10 @@ ggml_cgraph * clip_graph_llava::build() {
307307 else if (proj_type == PROJECTOR_TYPE_LDPV2)
308308 {
309309 int n_patch = 24 ;
310- ggml_tensor * mlp_0 = ggml_mul_mat (ctx0, model.mm_model_mlp_0_w , embeddings);
310+ ggml_tensor * mlp_0 = build_mm ( model.mm_model_mlp_0_w , embeddings);
311311 mlp_0 = ggml_add (ctx0, mlp_0, model.mm_model_mlp_0_b );
312312 mlp_0 = ggml_gelu (ctx0, mlp_0);
313- ggml_tensor * mlp_2 = ggml_mul_mat (ctx0, model.mm_model_mlp_2_w , mlp_0);
313+ ggml_tensor * mlp_2 = build_mm ( model.mm_model_mlp_2_w , mlp_0);
314314 mlp_2 = ggml_add (ctx0, mlp_2, model.mm_model_mlp_2_b );
315315 // mlp_2 ne = [2048, 576, 1, 1]
316316 // // AVG Pool Layer 2*2, strides = 2
@@ -344,15 +344,15 @@ ggml_cgraph * clip_graph_llava::build() {
344344 embeddings = ggml_add (ctx0, embeddings, model.mm_model_adapter_conv_b );
345345 // GLU
346346 {
347- embeddings = ggml_mul_mat (ctx0, model.mm_model_mlp_0_w , embeddings);
347+ embeddings = build_mm ( model.mm_model_mlp_0_w , embeddings);
348348 embeddings = ggml_norm (ctx0, embeddings, eps);
349349 embeddings = ggml_add (ctx0, ggml_mul (ctx0, embeddings, model.mm_model_ln_q_w ), model.mm_model_ln_q_b );
350350 embeddings = ggml_gelu_inplace (ctx0, embeddings);
351351 ggml_tensor * x = embeddings;
352- embeddings = ggml_mul_mat (ctx0, model.mm_model_mlp_2_w , embeddings);
353- x = ggml_mul_mat (ctx0, model.mm_model_mlp_1_w ,x);
352+ embeddings = build_mm ( model.mm_model_mlp_2_w , embeddings);
353+ x = build_mm ( model.mm_model_mlp_1_w ,x);
354354 embeddings = ggml_swiglu_split (ctx0, embeddings, x);
355- embeddings = ggml_mul_mat (ctx0, model.mm_model_mlp_3_w , embeddings);
355+ embeddings = build_mm ( model.mm_model_mlp_3_w , embeddings);
356356 }
357357 // arrangement of BOI/EOI token embeddings
358358 // note: these embeddings are not present in text model, hence we cannot process them as text tokens
0 commit comments