@@ -195,3 +195,213 @@ static const std::map<vad_tensor, const char *> VAD_TENSOR_NAMES = {
195195 {VAD_TENSOR_FINAL_CONV_WEIGHT , " _model.decoder.decoder.2.weight" },
196196 {VAD_TENSOR_FINAL_CONV_BIAS , " _model.decoder.decoder.2.bias" }
197197};
198+
199+ enum parakeet_tensor {
200+ // Preprocessor
201+ PARAKEET_TENSOR_PREPROC_WINDOW ,
202+ PARAKEET_TENSOR_PREPROC_FB ,
203+
204+ // Encoder pre_encode
205+ PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT ,
206+ PARAKEET_TENSOR_ENC_PRE_OUT_BIAS ,
207+ PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT ,
208+ PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS ,
209+ PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT ,
210+ PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS ,
211+ PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT ,
212+ PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS ,
213+ PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT ,
214+ PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS ,
215+ PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT ,
216+ PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS ,
217+
218+ // Encoder layers (per-layer)
219+ PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT ,
220+ PARAKEET_TENSOR_ENC_NORM_FF1_BIAS ,
221+ PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT ,
222+ PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT ,
223+ PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT ,
224+ PARAKEET_TENSOR_ENC_NORM_CONV_BIAS ,
225+ PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT ,
226+ PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT ,
227+ PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT ,
228+ PARAKEET_TENSOR_ENC_CONV_BN_BIAS ,
229+ PARAKEET_TENSOR_ENC_CONV_BN_MEAN ,
230+ PARAKEET_TENSOR_ENC_CONV_BN_VAR ,
231+ PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES ,
232+ PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT ,
233+ PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT ,
234+ PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS ,
235+ PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U ,
236+ PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V ,
237+ PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT ,
238+ PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT ,
239+ PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT ,
240+ PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT ,
241+ PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT ,
242+ PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT ,
243+ PARAKEET_TENSOR_ENC_NORM_FF2_BIAS ,
244+ PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT ,
245+ PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT ,
246+ PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT ,
247+ PARAKEET_TENSOR_ENC_NORM_OUT_BIAS ,
248+
249+ // Decoder
250+ PARAKEET_TENSOR_DEC_EMBED_WEIGHT ,
251+ PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH ,
252+ PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH ,
253+ PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH ,
254+ PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH ,
255+ PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH ,
256+ PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH ,
257+ PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH ,
258+ PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH ,
259+
260+ // Joint network
261+ PARAKEET_TENSOR_JOINT_PRED_WEIGHT ,
262+ PARAKEET_TENSOR_JOINT_PRED_BIAS ,
263+ PARAKEET_TENSOR_JOINT_ENC_WEIGHT ,
264+ PARAKEET_TENSOR_JOINT_ENC_BIAS ,
265+ PARAKEET_TENSOR_JOINT_NET_WEIGHT ,
266+ PARAKEET_TENSOR_JOINT_NET_BIAS ,
267+ };
268+
269+ static const std::map<parakeet_tensor, const char *> PARAKEET_TENSOR_NAMES = {
270+ // Preprocessor
271+ {PARAKEET_TENSOR_PREPROC_WINDOW , " preprocessor.featurizer.window" },
272+ {PARAKEET_TENSOR_PREPROC_FB , " preprocessor.featurizer.fb" },
273+
274+ // Encoder pre_encode
275+ {PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT , " encoder.pre_encode.out.weight" },
276+ {PARAKEET_TENSOR_ENC_PRE_OUT_BIAS , " encoder.pre_encode.out.bias" },
277+ {PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT , " encoder.pre_encode.conv.0.weight" },
278+ {PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS , " encoder.pre_encode.conv.0.bias" },
279+ {PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT , " encoder.pre_encode.conv.2.weight" },
280+ {PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS , " encoder.pre_encode.conv.2.bias" },
281+ {PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT , " encoder.pre_encode.conv.3.weight" },
282+ {PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS , " encoder.pre_encode.conv.3.bias" },
283+ {PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT , " encoder.pre_encode.conv.5.weight" },
284+ {PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS , " encoder.pre_encode.conv.5.bias" },
285+ {PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT , " encoder.pre_encode.conv.6.weight" },
286+ {PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS , " encoder.pre_encode.conv.6.bias" },
287+
288+ // Encoder layers (use %d for layer number)
289+ {PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT , " encoder.layers.%d.norm_feed_forward1.weight" },
290+ {PARAKEET_TENSOR_ENC_NORM_FF1_BIAS , " encoder.layers.%d.norm_feed_forward1.bias" },
291+ {PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT , " encoder.layers.%d.feed_forward1.linear1.weight" },
292+ {PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT , " encoder.layers.%d.feed_forward1.linear2.weight" },
293+ {PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT , " encoder.layers.%d.norm_conv.weight" },
294+ {PARAKEET_TENSOR_ENC_NORM_CONV_BIAS , " encoder.layers.%d.norm_conv.bias" },
295+ {PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT , " encoder.layers.%d.conv.pointwise_conv1.weight" },
296+ {PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT , " encoder.layers.%d.conv.depthwise_conv.weight" },
297+ {PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT , " encoder.layers.%d.conv.batch_norm.weight" },
298+ {PARAKEET_TENSOR_ENC_CONV_BN_BIAS , " encoder.layers.%d.conv.batch_norm.bias" },
299+ {PARAKEET_TENSOR_ENC_CONV_BN_MEAN , " encoder.layers.%d.conv.batch_norm.running_mean" },
300+ {PARAKEET_TENSOR_ENC_CONV_BN_VAR , " encoder.layers.%d.conv.batch_norm.running_var" },
301+ {PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES , " encoder.layers.%d.conv.batch_norm.num_batches_tracked" },
302+ {PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT , " encoder.layers.%d.conv.pointwise_conv2.weight" },
303+ {PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT , " encoder.layers.%d.norm_self_att.weight" },
304+ {PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS , " encoder.layers.%d.norm_self_att.bias" },
305+ {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U , " encoder.layers.%d.self_attn.pos_bias_u" },
306+ {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V , " encoder.layers.%d.self_attn.pos_bias_v" },
307+ {PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT , " encoder.layers.%d.self_attn.linear_q.weight" },
308+ {PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT , " encoder.layers.%d.self_attn.linear_k.weight" },
309+ {PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT , " encoder.layers.%d.self_attn.linear_v.weight" },
310+ {PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT , " encoder.layers.%d.self_attn.linear_out.weight" },
311+ {PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT , " encoder.layers.%d.self_attn.linear_pos.weight" },
312+ {PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT , " encoder.layers.%d.norm_feed_forward2.weight" },
313+ {PARAKEET_TENSOR_ENC_NORM_FF2_BIAS , " encoder.layers.%d.norm_feed_forward2.bias" },
314+ {PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT , " encoder.layers.%d.feed_forward2.linear1.weight" },
315+ {PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT , " encoder.layers.%d.feed_forward2.linear2.weight" },
316+ {PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT , " encoder.layers.%d.norm_out.weight" },
317+ {PARAKEET_TENSOR_ENC_NORM_OUT_BIAS , " encoder.layers.%d.norm_out.bias" },
318+
319+ // Decoder
320+ {PARAKEET_TENSOR_DEC_EMBED_WEIGHT , " decoder.prediction.embed.weight" },
321+ {PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH , " decoder.prediction.dec_rnn.lstm.weight_ih_l0" },
322+ {PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH , " decoder.prediction.dec_rnn.lstm.weight_hh_l0" },
323+ {PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH , " decoder.prediction.dec_rnn.lstm.bias_ih_l0" },
324+ {PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH , " decoder.prediction.dec_rnn.lstm.bias_hh_l0" },
325+ {PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH , " decoder.prediction.dec_rnn.lstm.weight_ih_l1" },
326+ {PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH , " decoder.prediction.dec_rnn.lstm.weight_hh_l1" },
327+ {PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH , " decoder.prediction.dec_rnn.lstm.bias_ih_l1" },
328+ {PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH , " decoder.prediction.dec_rnn.lstm.bias_hh_l1" },
329+
330+ // Joint network
331+ {PARAKEET_TENSOR_JOINT_PRED_WEIGHT , " joint.pred.weight" },
332+ {PARAKEET_TENSOR_JOINT_PRED_BIAS , " joint.pred.bias" },
333+ {PARAKEET_TENSOR_JOINT_ENC_WEIGHT , " joint.enc.weight" },
334+ {PARAKEET_TENSOR_JOINT_ENC_BIAS , " joint.enc.bias" },
335+ {PARAKEET_TENSOR_JOINT_NET_WEIGHT , " joint.joint_net.2.weight" },
336+ {PARAKEET_TENSOR_JOINT_NET_BIAS , " joint.joint_net.2.bias" },
337+ };
338+
339+ static const std::map<parakeet_tensor, ggml_op> PARAKEET_TENSOR_INFO = {
340+ // Preprocessor
341+ {PARAKEET_TENSOR_PREPROC_WINDOW , GGML_OP_MUL },
342+ {PARAKEET_TENSOR_PREPROC_FB , GGML_OP_MUL_MAT },
343+
344+ // Encoder pre_encode
345+ {PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT , GGML_OP_MUL_MAT },
346+ {PARAKEET_TENSOR_ENC_PRE_OUT_BIAS , GGML_OP_ADD },
347+ {PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT , GGML_OP_IM2COL },
348+ {PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS , GGML_OP_ADD },
349+ {PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT , GGML_OP_IM2COL },
350+ {PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS , GGML_OP_ADD },
351+ {PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT , GGML_OP_IM2COL },
352+ {PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS , GGML_OP_ADD },
353+ {PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT , GGML_OP_IM2COL },
354+ {PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS , GGML_OP_ADD },
355+ {PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT , GGML_OP_IM2COL },
356+ {PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS , GGML_OP_ADD },
357+
358+ // Encoder layers
359+ {PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT , GGML_OP_MUL },
360+ {PARAKEET_TENSOR_ENC_NORM_FF1_BIAS , GGML_OP_ADD },
361+ {PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT , GGML_OP_MUL_MAT },
362+ {PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT , GGML_OP_MUL_MAT },
363+ {PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT , GGML_OP_MUL },
364+ {PARAKEET_TENSOR_ENC_NORM_CONV_BIAS , GGML_OP_ADD },
365+ {PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT , GGML_OP_IM2COL },
366+ {PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT , GGML_OP_IM2COL },
367+ {PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT , GGML_OP_MUL },
368+ {PARAKEET_TENSOR_ENC_CONV_BN_BIAS , GGML_OP_ADD },
369+ {PARAKEET_TENSOR_ENC_CONV_BN_MEAN , GGML_OP_SUB },
370+ {PARAKEET_TENSOR_ENC_CONV_BN_VAR , GGML_OP_DIV },
371+ {PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES , GGML_OP_NONE },
372+ {PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT , GGML_OP_IM2COL },
373+ {PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT , GGML_OP_MUL },
374+ {PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS , GGML_OP_ADD },
375+ {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U , GGML_OP_ADD },
376+ {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V , GGML_OP_ADD },
377+ {PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT , GGML_OP_MUL_MAT },
378+ {PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT , GGML_OP_MUL_MAT },
379+ {PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT , GGML_OP_MUL_MAT },
380+ {PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT , GGML_OP_MUL_MAT },
381+ {PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT , GGML_OP_MUL_MAT },
382+ {PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT , GGML_OP_MUL },
383+ {PARAKEET_TENSOR_ENC_NORM_FF2_BIAS , GGML_OP_ADD },
384+ {PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT , GGML_OP_MUL_MAT },
385+ {PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT , GGML_OP_MUL_MAT },
386+ {PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT , GGML_OP_MUL },
387+ {PARAKEET_TENSOR_ENC_NORM_OUT_BIAS , GGML_OP_ADD },
388+
389+ // Decoder
390+ {PARAKEET_TENSOR_DEC_EMBED_WEIGHT , GGML_OP_GET_ROWS },
391+ {PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH , GGML_OP_MUL_MAT },
392+ {PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH , GGML_OP_MUL_MAT },
393+ {PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH , GGML_OP_ADD },
394+ {PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH , GGML_OP_ADD },
395+ {PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH , GGML_OP_MUL_MAT },
396+ {PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH , GGML_OP_MUL_MAT },
397+ {PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH , GGML_OP_ADD },
398+ {PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH , GGML_OP_ADD },
399+
400+ // Joint network
401+ {PARAKEET_TENSOR_JOINT_PRED_WEIGHT , GGML_OP_MUL_MAT },
402+ {PARAKEET_TENSOR_JOINT_PRED_BIAS , GGML_OP_ADD },
403+ {PARAKEET_TENSOR_JOINT_ENC_WEIGHT , GGML_OP_MUL_MAT },
404+ {PARAKEET_TENSOR_JOINT_ENC_BIAS , GGML_OP_ADD },
405+ {PARAKEET_TENSOR_JOINT_NET_WEIGHT , GGML_OP_MUL_MAT },
406+ {PARAKEET_TENSOR_JOINT_NET_BIAS , GGML_OP_ADD },
407+ };
0 commit comments