Skip to content

Commit fba5e67

Browse files
authored
Fix CI tests. (#1974)
- Introduce unified AMP helpers (create_grad_scaler, torch_autocast) to handle deprecations in PyTorch ≥2.3.0 - Replace direct uses of torch.cuda.amp.GradScaler and torch.cuda.amp.autocast with the new utilities across all training and inference scripts - Update all torch.load calls to include weights_only=False for compatibility with newer PyTorch versions
1 parent 71377d2 commit fba5e67

176 files changed

Lines changed: 881 additions & 501 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/aishell.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ concurrency:
1717

1818
jobs:
1919
generate_build_matrix:
20-
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'aishell')
20+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
2121

2222
# see https://github.com/pytorch/pytorch/pull/50633
2323
runs-on: ubuntu-latest
@@ -31,8 +31,8 @@ jobs:
3131
id: set-matrix
3232
run: |
3333
# outputting for debugging purposes
34-
python ./.github/scripts/docker/generate_build_matrix.py
35-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
34+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
35+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
3636
echo "::set-output name=matrix::${MATRIX}"
3737
aishell:
3838
needs: generate_build_matrix

.github/workflows/audioset.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ jobs:
3030
id: set-matrix
3131
run: |
3232
# outputting for debugging purposes
33-
python ./.github/scripts/docker/generate_build_matrix.py
34-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
33+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
3535
echo "::set-output name=matrix::${MATRIX}"
3636
3737
audioset:
@@ -83,7 +83,7 @@ jobs:
8383
ls -lh ./model-onnx/*
8484
8585
- name: Upload model to huggingface
86-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
86+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
8787
env:
8888
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8989
uses: nick-fields/retry@v3
@@ -116,7 +116,7 @@ jobs:
116116
rm -rf huggingface
117117
118118
- name: Prepare for release
119-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
119+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
120120
shell: bash
121121
run: |
122122
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
@@ -125,7 +125,7 @@ jobs:
125125
ls -lh
126126
127127
- name: Release exported onnx models
128-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
128+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
129129
uses: svenstaro/upload-release-action@v2
130130
with:
131131
file_glob: true

.github/workflows/baker_zh.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ jobs:
3131
id: set-matrix
3232
run: |
3333
# outputting for debugging purposes
34-
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
35-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
34+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
35+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
3636
echo "::set-output name=matrix::${MATRIX}"
3737
3838
baker_zh:
@@ -84,43 +84,43 @@ jobs:
8484
ls -lh
8585
8686
- uses: actions/upload-artifact@v4
87-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
87+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
8888
with:
8989
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
9090
path: ./*.wav
9191

9292
- uses: actions/upload-artifact@v4
93-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
93+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
9494
with:
9595
name: step-2
9696
path: ./model-steps-2.onnx
9797

9898
- uses: actions/upload-artifact@v4
99-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
99+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
100100
with:
101101
name: step-3
102102
path: ./model-steps-3.onnx
103103

104104
- uses: actions/upload-artifact@v4
105-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
105+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
106106
with:
107107
name: step-4
108108
path: ./model-steps-4.onnx
109109

110110
- uses: actions/upload-artifact@v4
111-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
111+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
112112
with:
113113
name: step-5
114114
path: ./model-steps-5.onnx
115115

116116
- uses: actions/upload-artifact@v4
117-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
117+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
118118
with:
119119
name: step-6
120120
path: ./model-steps-6.onnx
121121

122122
- name: Upload models to huggingface
123-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
123+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
124124
shell: bash
125125
env:
126126
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -141,7 +141,7 @@ jobs:
141141
popd
142142
143143
- name: Release exported onnx models
144-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
144+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
145145
uses: svenstaro/upload-release-action@v2
146146
with:
147147
file_glob: true

.github/workflows/librispeech.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ jobs:
2929
id: set-matrix
3030
run: |
3131
# outputting for debugging purposes
32-
python ./.github/scripts/docker/generate_build_matrix.py
33-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
32+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
33+
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
34+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
3435
echo "::set-output name=matrix::${MATRIX}"
3536
librispeech:
3637
needs: generate_build_matrix

.github/workflows/ljspeech.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ jobs:
3030
id: set-matrix
3131
run: |
3232
# outputting for debugging purposes
33-
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
34-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
33+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
3535
echo "::set-output name=matrix::${MATRIX}"
3636
3737
ljspeech:
@@ -83,13 +83,13 @@ jobs:
8383
ls -lh
8484
8585
- uses: actions/upload-artifact@v4
86-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
86+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
8787
with:
8888
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
8989
path: ./*.wav
9090

9191
- name: Release exported onnx models
92-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
92+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
9393
uses: svenstaro/upload-release-action@v2
9494
with:
9595
file_glob: true
@@ -100,37 +100,37 @@ jobs:
100100
tag: tts-models
101101

102102
- uses: actions/upload-artifact@v4
103-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
103+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
104104
with:
105105
name: step-2
106106
path: ./model-steps-2.onnx
107107

108108
- uses: actions/upload-artifact@v4
109-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
109+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
110110
with:
111111
name: step-3
112112
path: ./model-steps-3.onnx
113113

114114
- uses: actions/upload-artifact@v4
115-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
115+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
116116
with:
117117
name: step-4
118118
path: ./model-steps-4.onnx
119119

120120
- uses: actions/upload-artifact@v4
121-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
121+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
122122
with:
123123
name: step-5
124124
path: ./model-steps-5.onnx
125125

126126
- uses: actions/upload-artifact@v4
127-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
127+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
128128
with:
129129
name: step-6
130130
path: ./model-steps-6.onnx
131131

132132
- name: Upload models to huggingface
133-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
133+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
134134
shell: bash
135135
env:
136136
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -155,7 +155,7 @@ jobs:
155155
popd
156156
157157
- name: Release exported onnx models
158-
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
158+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
159159
uses: svenstaro/upload-release-action@v2
160160
with:
161161
file_glob: true

.github/workflows/test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ jobs:
3030
id: set-matrix
3131
run: |
3232
# outputting for debugging purposes
33-
python ./.github/scripts/docker/generate_build_matrix.py
34-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
33+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
3535
echo "::set-output name=matrix::${MATRIX}"
3636
test:
3737
needs: generate_build_matrix

.github/workflows/yesno.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ jobs:
3030
id: set-matrix
3131
run: |
3232
# outputting for debugging purposes
33-
python ./.github/scripts/docker/generate_build_matrix.py
34-
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
33+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
35+
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.5.0")
3536
echo "::set-output name=matrix::${MATRIX}"
3637
yesno:
3738
needs: generate_build_matrix

egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/train.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,13 @@
7979
from icefall.dist import cleanup_dist, setup_dist
8080
from icefall.env import get_env_info
8181
from icefall.lexicon import Lexicon
82-
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
82+
from icefall.utils import (
83+
AttributeDict,
84+
MetricsTracker,
85+
setup_logger,
86+
str2bool,
87+
torch_autocast,
88+
)
8389

8490
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
8591

@@ -638,7 +644,7 @@ def train_one_epoch(
638644
params.batch_idx_train += 1
639645
batch_size = len(batch["supervisions"]["text"])
640646

641-
with torch.cuda.amp.autocast(enabled=params.use_fp16):
647+
with torch_autocast(enabled=params.use_fp16):
642648
loss, loss_info = compute_loss(
643649
params=params,
644650
model=model,
@@ -912,7 +918,7 @@ def scan_pessimistic_batches_for_oom(
912918
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
913919
# (i.e. are not remembered by the decaying-average in adam), because
914920
# we want to avoid these params being subject to shrinkage in adam.
915-
with torch.cuda.amp.autocast(enabled=params.use_fp16):
921+
with torch_autocast(enabled=params.use_fp16):
916922
loss, _ = compute_loss(
917923
params=params,
918924
model=model,

egs/aishell/ASR/pruned_transducer_stateless2/train.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,13 @@
7272
from icefall.dist import cleanup_dist, setup_dist
7373
from icefall.env import get_env_info
7474
from icefall.lexicon import Lexicon
75-
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
75+
from icefall.utils import (
76+
AttributeDict,
77+
MetricsTracker,
78+
setup_logger,
79+
str2bool,
80+
torch_autocast,
81+
)
7682

7783
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
7884

@@ -688,7 +694,7 @@ def train_one_epoch(
688694
batch_size = len(batch["supervisions"]["text"])
689695

690696
try:
691-
with torch.cuda.amp.autocast(enabled=params.use_fp16):
697+
with torch_autocast(enabled=params.use_fp16):
692698
loss, loss_info = compute_loss(
693699
params=params,
694700
model=model,
@@ -989,7 +995,7 @@ def scan_pessimistic_batches_for_oom(
989995
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
990996
# (i.e. are not remembered by the decaying-average in adam), because
991997
# we want to avoid these params being subject to shrinkage in adam.
992-
with torch.cuda.amp.autocast(enabled=params.use_fp16):
998+
with torch_autocast(enabled=params.use_fp16):
993999
loss, _ = compute_loss(
9941000
params=params,
9951001
model=model,

egs/aishell/ASR/pruned_transducer_stateless3/model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from encoder_interface import EncoderInterface
2424
from scaling import ScaledLinear
2525

26-
from icefall.utils import add_sos
26+
from icefall.utils import add_sos, torch_autocast
2727

2828

2929
class Transducer(nn.Module):
@@ -184,7 +184,7 @@ def forward(
184184
lm = simple_lm_proj(decoder_out)
185185
am = simple_am_proj(encoder_out)
186186

187-
with torch.cuda.amp.autocast(enabled=False):
187+
with torch_autocast(enabled=False):
188188
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
189189
lm=lm.float(),
190190
am=am.float(),
@@ -219,7 +219,7 @@ def forward(
219219
# prior to do_rnnt_pruning (this is an optimization for speed).
220220
logits = joiner(am_pruned, lm_pruned, project_input=False)
221221

222-
with torch.cuda.amp.autocast(enabled=False):
222+
with torch_autocast(enabled=False):
223223
pruned_loss = k2.rnnt_loss_pruned(
224224
logits=logits.float(),
225225
symbols=y_padded,

0 commit comments

Comments
 (0)