Skip to content

Commit ffc9c0d

Browse files
committed
parakeet : force loading of Parakeet model (wip)
This commit enables the parakeet model to be loaded but nothing more. This is only done to test the model loading and to try to figure out what tensors are needed and as a start for understanding this model and how similar/disimilar it is to the whisper model. This allows the test-parakeet test to pass.
1 parent e91291a commit ffc9c0d

2 files changed

Lines changed: 682 additions & 50 deletions

File tree

src/whisper-arch.h

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,213 @@ static const std::map<vad_tensor, const char *> VAD_TENSOR_NAMES = {
195195
{VAD_TENSOR_FINAL_CONV_WEIGHT, "_model.decoder.decoder.2.weight"},
196196
{VAD_TENSOR_FINAL_CONV_BIAS, "_model.decoder.decoder.2.bias"}
197197
};
198+
199+
enum parakeet_tensor {
200+
// Preprocessor
201+
PARAKEET_TENSOR_PREPROC_WINDOW,
202+
PARAKEET_TENSOR_PREPROC_FB,
203+
204+
// Encoder pre_encode
205+
PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT,
206+
PARAKEET_TENSOR_ENC_PRE_OUT_BIAS,
207+
PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT,
208+
PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS,
209+
PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT,
210+
PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS,
211+
PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT,
212+
PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS,
213+
PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT,
214+
PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS,
215+
PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT,
216+
PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS,
217+
218+
// Encoder layers (per-layer)
219+
PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT,
220+
PARAKEET_TENSOR_ENC_NORM_FF1_BIAS,
221+
PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT,
222+
PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT,
223+
PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT,
224+
PARAKEET_TENSOR_ENC_NORM_CONV_BIAS,
225+
PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT,
226+
PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT,
227+
PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT,
228+
PARAKEET_TENSOR_ENC_CONV_BN_BIAS,
229+
PARAKEET_TENSOR_ENC_CONV_BN_MEAN,
230+
PARAKEET_TENSOR_ENC_CONV_BN_VAR,
231+
PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES,
232+
PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT,
233+
PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT,
234+
PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS,
235+
PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U,
236+
PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V,
237+
PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT,
238+
PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT,
239+
PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT,
240+
PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT,
241+
PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT,
242+
PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT,
243+
PARAKEET_TENSOR_ENC_NORM_FF2_BIAS,
244+
PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT,
245+
PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT,
246+
PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT,
247+
PARAKEET_TENSOR_ENC_NORM_OUT_BIAS,
248+
249+
// Decoder
250+
PARAKEET_TENSOR_DEC_EMBED_WEIGHT,
251+
PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH,
252+
PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH,
253+
PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH,
254+
PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH,
255+
PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH,
256+
PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH,
257+
PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH,
258+
PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH,
259+
260+
// Joint network
261+
PARAKEET_TENSOR_JOINT_PRED_WEIGHT,
262+
PARAKEET_TENSOR_JOINT_PRED_BIAS,
263+
PARAKEET_TENSOR_JOINT_ENC_WEIGHT,
264+
PARAKEET_TENSOR_JOINT_ENC_BIAS,
265+
PARAKEET_TENSOR_JOINT_NET_WEIGHT,
266+
PARAKEET_TENSOR_JOINT_NET_BIAS,
267+
};
268+
269+
static const std::map<parakeet_tensor, const char *> PARAKEET_TENSOR_NAMES = {
270+
// Preprocessor
271+
{PARAKEET_TENSOR_PREPROC_WINDOW, "preprocessor.featurizer.window"},
272+
{PARAKEET_TENSOR_PREPROC_FB, "preprocessor.featurizer.fb"},
273+
274+
// Encoder pre_encode
275+
{PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT, "encoder.pre_encode.out.weight"},
276+
{PARAKEET_TENSOR_ENC_PRE_OUT_BIAS, "encoder.pre_encode.out.bias"},
277+
{PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT, "encoder.pre_encode.conv.0.weight"},
278+
{PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS, "encoder.pre_encode.conv.0.bias"},
279+
{PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT, "encoder.pre_encode.conv.2.weight"},
280+
{PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS, "encoder.pre_encode.conv.2.bias"},
281+
{PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT, "encoder.pre_encode.conv.3.weight"},
282+
{PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS, "encoder.pre_encode.conv.3.bias"},
283+
{PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT, "encoder.pre_encode.conv.5.weight"},
284+
{PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS, "encoder.pre_encode.conv.5.bias"},
285+
{PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT, "encoder.pre_encode.conv.6.weight"},
286+
{PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS, "encoder.pre_encode.conv.6.bias"},
287+
288+
// Encoder layers (use %d for layer number)
289+
{PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT, "encoder.layers.%d.norm_feed_forward1.weight"},
290+
{PARAKEET_TENSOR_ENC_NORM_FF1_BIAS, "encoder.layers.%d.norm_feed_forward1.bias"},
291+
{PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT, "encoder.layers.%d.feed_forward1.linear1.weight"},
292+
{PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT, "encoder.layers.%d.feed_forward1.linear2.weight"},
293+
{PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT, "encoder.layers.%d.norm_conv.weight"},
294+
{PARAKEET_TENSOR_ENC_NORM_CONV_BIAS, "encoder.layers.%d.norm_conv.bias"},
295+
{PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT, "encoder.layers.%d.conv.pointwise_conv1.weight"},
296+
{PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT, "encoder.layers.%d.conv.depthwise_conv.weight"},
297+
{PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT, "encoder.layers.%d.conv.batch_norm.weight"},
298+
{PARAKEET_TENSOR_ENC_CONV_BN_BIAS, "encoder.layers.%d.conv.batch_norm.bias"},
299+
{PARAKEET_TENSOR_ENC_CONV_BN_MEAN, "encoder.layers.%d.conv.batch_norm.running_mean"},
300+
{PARAKEET_TENSOR_ENC_CONV_BN_VAR, "encoder.layers.%d.conv.batch_norm.running_var"},
301+
{PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES, "encoder.layers.%d.conv.batch_norm.num_batches_tracked"},
302+
{PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT, "encoder.layers.%d.conv.pointwise_conv2.weight"},
303+
{PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT, "encoder.layers.%d.norm_self_att.weight"},
304+
{PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS, "encoder.layers.%d.norm_self_att.bias"},
305+
{PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U, "encoder.layers.%d.self_attn.pos_bias_u"},
306+
{PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V, "encoder.layers.%d.self_attn.pos_bias_v"},
307+
{PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT, "encoder.layers.%d.self_attn.linear_q.weight"},
308+
{PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT, "encoder.layers.%d.self_attn.linear_k.weight"},
309+
{PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT, "encoder.layers.%d.self_attn.linear_v.weight"},
310+
{PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT, "encoder.layers.%d.self_attn.linear_out.weight"},
311+
{PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT, "encoder.layers.%d.self_attn.linear_pos.weight"},
312+
{PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT, "encoder.layers.%d.norm_feed_forward2.weight"},
313+
{PARAKEET_TENSOR_ENC_NORM_FF2_BIAS, "encoder.layers.%d.norm_feed_forward2.bias"},
314+
{PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT, "encoder.layers.%d.feed_forward2.linear1.weight"},
315+
{PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT, "encoder.layers.%d.feed_forward2.linear2.weight"},
316+
{PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT, "encoder.layers.%d.norm_out.weight"},
317+
{PARAKEET_TENSOR_ENC_NORM_OUT_BIAS, "encoder.layers.%d.norm_out.bias"},
318+
319+
// Decoder
320+
{PARAKEET_TENSOR_DEC_EMBED_WEIGHT, "decoder.prediction.embed.weight"},
321+
{PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH, "decoder.prediction.dec_rnn.lstm.weight_ih_l0"},
322+
{PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH, "decoder.prediction.dec_rnn.lstm.weight_hh_l0"},
323+
{PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH, "decoder.prediction.dec_rnn.lstm.bias_ih_l0"},
324+
{PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH, "decoder.prediction.dec_rnn.lstm.bias_hh_l0"},
325+
{PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH, "decoder.prediction.dec_rnn.lstm.weight_ih_l1"},
326+
{PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH, "decoder.prediction.dec_rnn.lstm.weight_hh_l1"},
327+
{PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH, "decoder.prediction.dec_rnn.lstm.bias_ih_l1"},
328+
{PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH, "decoder.prediction.dec_rnn.lstm.bias_hh_l1"},
329+
330+
// Joint network
331+
{PARAKEET_TENSOR_JOINT_PRED_WEIGHT, "joint.pred.weight"},
332+
{PARAKEET_TENSOR_JOINT_PRED_BIAS, "joint.pred.bias"},
333+
{PARAKEET_TENSOR_JOINT_ENC_WEIGHT, "joint.enc.weight"},
334+
{PARAKEET_TENSOR_JOINT_ENC_BIAS, "joint.enc.bias"},
335+
{PARAKEET_TENSOR_JOINT_NET_WEIGHT, "joint.joint_net.2.weight"},
336+
{PARAKEET_TENSOR_JOINT_NET_BIAS, "joint.joint_net.2.bias"},
337+
};
338+
339+
static const std::map<parakeet_tensor, ggml_op> PARAKEET_TENSOR_INFO = {
340+
// Preprocessor
341+
{PARAKEET_TENSOR_PREPROC_WINDOW, GGML_OP_MUL},
342+
{PARAKEET_TENSOR_PREPROC_FB, GGML_OP_MUL_MAT},
343+
344+
// Encoder pre_encode
345+
{PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT, GGML_OP_MUL_MAT},
346+
{PARAKEET_TENSOR_ENC_PRE_OUT_BIAS, GGML_OP_ADD},
347+
{PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT, GGML_OP_IM2COL},
348+
{PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS, GGML_OP_ADD},
349+
{PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT, GGML_OP_IM2COL},
350+
{PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS, GGML_OP_ADD},
351+
{PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT, GGML_OP_IM2COL},
352+
{PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS, GGML_OP_ADD},
353+
{PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT, GGML_OP_IM2COL},
354+
{PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS, GGML_OP_ADD},
355+
{PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT, GGML_OP_IM2COL},
356+
{PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS, GGML_OP_ADD},
357+
358+
// Encoder layers
359+
{PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT, GGML_OP_MUL},
360+
{PARAKEET_TENSOR_ENC_NORM_FF1_BIAS, GGML_OP_ADD},
361+
{PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT, GGML_OP_MUL_MAT},
362+
{PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT, GGML_OP_MUL_MAT},
363+
{PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT, GGML_OP_MUL},
364+
{PARAKEET_TENSOR_ENC_NORM_CONV_BIAS, GGML_OP_ADD},
365+
{PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT, GGML_OP_IM2COL},
366+
{PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT, GGML_OP_IM2COL},
367+
{PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT, GGML_OP_MUL},
368+
{PARAKEET_TENSOR_ENC_CONV_BN_BIAS, GGML_OP_ADD},
369+
{PARAKEET_TENSOR_ENC_CONV_BN_MEAN, GGML_OP_SUB},
370+
{PARAKEET_TENSOR_ENC_CONV_BN_VAR, GGML_OP_DIV},
371+
{PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES, GGML_OP_NONE},
372+
{PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT, GGML_OP_IM2COL},
373+
{PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT, GGML_OP_MUL},
374+
{PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS, GGML_OP_ADD},
375+
{PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U, GGML_OP_ADD},
376+
{PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V, GGML_OP_ADD},
377+
{PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT, GGML_OP_MUL_MAT},
378+
{PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT, GGML_OP_MUL_MAT},
379+
{PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT, GGML_OP_MUL_MAT},
380+
{PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT, GGML_OP_MUL_MAT},
381+
{PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT, GGML_OP_MUL_MAT},
382+
{PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT, GGML_OP_MUL},
383+
{PARAKEET_TENSOR_ENC_NORM_FF2_BIAS, GGML_OP_ADD},
384+
{PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT, GGML_OP_MUL_MAT},
385+
{PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT, GGML_OP_MUL_MAT},
386+
{PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT, GGML_OP_MUL},
387+
{PARAKEET_TENSOR_ENC_NORM_OUT_BIAS, GGML_OP_ADD},
388+
389+
// Decoder
390+
{PARAKEET_TENSOR_DEC_EMBED_WEIGHT, GGML_OP_GET_ROWS},
391+
{PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_IH, GGML_OP_MUL_MAT},
392+
{PARAKEET_TENSOR_DEC_LSTM_L0_WEIGHT_HH, GGML_OP_MUL_MAT},
393+
{PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_IH, GGML_OP_ADD},
394+
{PARAKEET_TENSOR_DEC_LSTM_L0_BIAS_HH, GGML_OP_ADD},
395+
{PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_IH, GGML_OP_MUL_MAT},
396+
{PARAKEET_TENSOR_DEC_LSTM_L1_WEIGHT_HH, GGML_OP_MUL_MAT},
397+
{PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_IH, GGML_OP_ADD},
398+
{PARAKEET_TENSOR_DEC_LSTM_L1_BIAS_HH, GGML_OP_ADD},
399+
400+
// Joint network
401+
{PARAKEET_TENSOR_JOINT_PRED_WEIGHT, GGML_OP_MUL_MAT},
402+
{PARAKEET_TENSOR_JOINT_PRED_BIAS, GGML_OP_ADD},
403+
{PARAKEET_TENSOR_JOINT_ENC_WEIGHT, GGML_OP_MUL_MAT},
404+
{PARAKEET_TENSOR_JOINT_ENC_BIAS, GGML_OP_ADD},
405+
{PARAKEET_TENSOR_JOINT_NET_WEIGHT, GGML_OP_MUL_MAT},
406+
{PARAKEET_TENSOR_JOINT_NET_BIAS, GGML_OP_ADD},
407+
};

0 commit comments

Comments
 (0)