@@ -189,8 +189,9 @@ def py():
189189
190190 # V2.3: using ChunkedConformerEncoderV2, setting version=3.
191191 # reduce chunk sizes, history, if the input is not long enough (adapt_chunk_history_for_short_seqs=True default)
192- # train_time_hours: 168.8 (v1: 215.6; adapt_chunk_history_...=False: 168.9)
193- # CTC-only: 9.46 (v1: 9.56; adapt_chunk_history_...=False: 9.45)
192+ # train_time_hours: 168.8 (v1: 215.6; adapt_chunk_history_...=False: 168.9; offline: 66.2)
193+ # CTC-only: 9.46 (v1: 9.56; adapt_chunk_history_...=False: 9.45; offline: 7.32)
194+ # CTC+LM: 7.22 (offline: 6.12)
194195 train (
195196 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3" ,
196197 {
@@ -232,6 +233,8 @@ def py():
232233 # Rope instead of relpos selfatt (ChunkedRotaryPosSelfAttentionV2).
233234 # (We don't expect really improvements in terms of WER. Hopefully mostly the same.
234235 # However, we can hope to have better speed here, maybe also less memory consumption. Check that.)
236+ # train_time_hours: 237.1 (vs 168.8) (TODO ???)
237+ # CTC-only: 9.31 (vs 9.46)
235238 train (
236239 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-rope" ,
237240 {
@@ -259,6 +262,8 @@ def py():
259262 # (No right context in that setup.)
260263 # We do here some slightly different schedule, but similar.
261264 # But also varying the lookahead.
265+ # train_time_hours: 103.9 (vs 168.8)
266+ # CTC-only: 9.66 (vs 9.46)
262267 train (
263268 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-dyn" ,
264269 {
@@ -279,6 +284,8 @@ def py():
279284 )
280285
281286 # Dynamic chunking + rope.
287+ # train_time_hours: 128.4 (without rope: 103.9; not dynamic, without rope: 168.8) (TODO ??? rope impl slow?)
288+ # CTC-only: 9.55 (without rope: 9.66; not dynamic, without rope: 9.46)
282289 train (
283290 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-dyn-rope" ,
284291 {
@@ -301,6 +308,8 @@ def py():
301308
302309 # Chunk-type embed (ctembed) (use_chunk_type_embedding=True)
303310 # Using with dynamic chunking + rope as base.
311+ # train_time_hours: 128.3 (vs 128.4)
312+ # CTC-only: 9.41 (vs 9.55; no dyn, no ctembed, just rope: 9.31)
304313 train (
305314 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-dyn-rope-ctembed" ,
306315 {
@@ -322,7 +331,9 @@ def py():
322331 },
323332 )
324333
325- # Dyn-v2: Try to make it faster.
334+ # Dyn-v2 (dynV2): Try to make it faster.
335+ # train_time_hours: 100.1 (vs 128.3)
336+ # CTC-only: 11.0 (vs 9.41) (TODO...)
326337 train (
327338 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-dynV2-rope-ctembed" ,
328339 {
@@ -345,6 +356,7 @@ def py():
345356 )
346357
347358 # Overlapping chunks (chunk_num_overlaps=2)
359+ # TODO (running...)
348360 train (
349361 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-overlap" ,
350362 {
@@ -363,6 +375,7 @@ def py():
363375 )
364376
365377 # Dyn + rope + ctembed + overlap.
378+ # TODO (running...)
366379 train (
367380 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-dyn-rope-ctembed-overlap" ,
368381 {
0 commit comments