Skip to content

Commit 3a5570e

Browse files
committed
more
1 parent bd55cb7 commit 3a5570e

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

users/zeyer/experiments/exp2025_10_21_chunked_ctc.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,9 @@ def py():
189189

190190
# V2.3: using ChunkedConformerEncoderV2, setting version=3.
191191
# reduce chunk sizes, history, if the input is not long enough (adapt_chunk_history_for_short_seqs=True default)
192-
# train_time_hours: 168.8 (v1: 215.6; adapt_chunk_history_...=False: 168.9)
193-
# CTC-only: 9.46 (v1: 9.56; adapt_chunk_history_...=False: 9.45)
192+
# train_time_hours: 168.8 (v1: 215.6; adapt_chunk_history_...=False: 168.9; offline: 66.2)
193+
# CTC-only: 9.46 (v1: 9.56; adapt_chunk_history_...=False: 9.45; offline: 7.32)
194+
# CTC+LM: 7.22 (offline: 6.12)
194195
train(
195196
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3",
196197
{
@@ -232,6 +233,8 @@ def py():
232233
# Rope instead of relpos selfatt (ChunkedRotaryPosSelfAttentionV2).
233234
# (We don't expect really improvements in terms of WER. Hopefully mostly the same.
234235
# However, we can hope to have better speed here, maybe also less memory consumption. Check that.)
236+
# train_time_hours: 237.1 (vs 168.8) (TODO ???)
237+
# CTC-only: 9.31 (vs 9.46)
235238
train(
236239
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-rope",
237240
{
@@ -259,6 +262,8 @@ def py():
259262
# (No right context in that setup.)
260263
# We do here some slightly different schedule, but similar.
261264
# But also varying the lookahead.
265+
# train_time_hours: 103.9 (vs 168.8)
266+
# CTC-only: 9.66 (vs 9.46)
262267
train(
263268
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-dyn",
264269
{
@@ -279,6 +284,8 @@ def py():
279284
)
280285

281286
# Dynamic chunking + rope.
287+
# train_time_hours: 128.4 (without rope: 103.9; not dynamic, without rope: 168.8) (TODO ??? rope impl slow?)
288+
# CTC-only: 9.55 (without rope: 9.66; not dynamic, without rope: 9.46)
282289
train(
283290
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-dyn-rope",
284291
{
@@ -301,6 +308,8 @@ def py():
301308

302309
# Chunk-type embed (ctembed) (use_chunk_type_embedding=True)
303310
# Using with dynamic chunking + rope as base.
311+
# train_time_hours: 128.3 (vs 128.4)
312+
# CTC-only: 9.41 (vs 9.55; no dyn, no ctembed, just rope: 9.31)
304313
train(
305314
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-dyn-rope-ctembed",
306315
{
@@ -322,7 +331,9 @@ def py():
322331
},
323332
)
324333

325-
# Dyn-v2: Try to make it faster.
334+
# Dyn-v2 (dynV2): Try to make it faster.
335+
# train_time_hours: 100.1 (vs 128.3)
336+
# CTC-only: 11.0 (vs 9.41) (TODO...)
326337
train(
327338
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-dynV2-rope-ctembed",
328339
{
@@ -345,6 +356,7 @@ def py():
345356
)
346357

347358
# Overlapping chunks (chunk_num_overlaps=2)
359+
# TODO (running...)
348360
train(
349361
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-overlap",
350362
{
@@ -363,6 +375,7 @@ def py():
363375
)
364376

365377
# Dyn + rope + ctembed + overlap.
378+
# TODO (running...)
366379
train(
367380
f"chunked-L{left_n * center_size}-C{center_size}-R{right_size}-v2.3-dyn-rope-ctembed-overlap",
368381
{

0 commit comments

Comments
 (0)