@@ -168,6 +168,8 @@ def py():
168168
169169 # V2.3: using ChunkedConformerEncoderV2, setting version=3.
170170 # First exp, try to reproduce the orig.
171+ # train_time_hours: 168.9 (v1: 215.6)
172+ # CTC-only: 9.45 (v1: 9.56)
171173 train (
172174 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-compat" ,
173175 {
@@ -186,7 +188,9 @@ def py():
186188 )
187189
188190 # V2.3: using ChunkedConformerEncoderV2, setting version=3.
189- # reduce chunk sizes, history, if the input is not long enough.
191+ # reduce chunk sizes, history, if the input is not long enough (adapt_chunk_history_for_short_seqs=True default)
192+ # train_time_hours: 168.8 (v1: 215.6; adapt_chunk_history_...=False: 168.9)
193+ # CTC-only: 9.46 (v1: 9.56; adapt_chunk_history_...=False: 9.45)
190194 train (
191195 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3" ,
192196 {
@@ -203,9 +207,11 @@ def py():
203207 },
204208 )
205209
206- # try grad checkpointing
210+ # Try grad checkpointing (mem_chunks_grad_checkpointing=True).
207211 # (In terms of WER, should really be the same.
208212 # if in terms of speed this is better, and same for memory consumption, we could maybe just always enable it.)
213+ # train_time_hours: 201.1 (v1: 215.6; ..._checkpointing=False: 168.8) (but requires less memory)
214+ # CTC-only: 9.52 (..._checkpointing=False: 9.46)
209215 train (
210216 f"chunked-L{ left_n * center_size } -C{ center_size } -R{ right_size } -v2.3-gdckpt" ,
211217 {
0 commit comments