Skip to content

Commit b7b54d2

Browse files
committed
Use GitHub Actions CI
1 parent 4824d68 commit b7b54d2

1 file changed

Lines changed: 334 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
name: NeMo Text Processing CI
2+
3+
on:
4+
push:
5+
branches: [main, staging/**, staging_*]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch: # Allow manual trigger
9+
10+
env:
11+
CUDA_VISIBLE_DEVICES: ""
12+
CACHE_BASE: ~/.cache/nemo_text_processing
13+
14+
jobs:
15+
# ========== Stage: PyTorch version & NeMo Installation ==========
16+
17+
setup-and-install:
18+
name: "Setup: Python & Install NeMo"
19+
runs-on: ubuntu-latest
20+
timeout-minutes: 15
21+
22+
steps:
23+
- uses: actions/checkout@v4
24+
25+
- name: Set up Python 3.10
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: "3.10"
29+
30+
- name: Cache pip packages
31+
uses: actions/cache@v4
32+
with:
33+
path: ~/.cache/pip
34+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/requirements.txt', 'requirements/requirements_test.txt') }}
35+
restore-keys: |
36+
${{ runner.os }}-pip-
37+
38+
- name: PyTorch version
39+
run: |
40+
pip install torch torchvision || true
41+
python -c "import torch; print('PyTorch version:', torch.__version__)" || echo "PyTorch not required"
42+
43+
- name: Install dependencies
44+
run: |
45+
python -m pip install --upgrade pip setuptools wheel
46+
pip install -r requirements/requirements.txt
47+
pip install -r requirements/requirements_test.txt
48+
49+
- name: NeMo Installation (reinstall.sh release equivalent)
50+
timeout-minutes: 10
51+
run: |
52+
pip install --editable ".[all]"
53+
54+
- name: Verify installation
55+
run: |
56+
python -c "import nemo_text_processing; print('✓ Package installed successfully')"
57+
58+
# ========== L0: Create Grammars (Parallel) ==========
59+
60+
l0-create-grammars:
61+
name: "L0: Create Grammars"
62+
needs: setup-and-install
63+
runs-on: ubuntu-latest
64+
timeout-minutes: 30
65+
strategy:
66+
fail-fast: false
67+
matrix:
68+
language:
69+
- { code: en, tn_text: "1", itn_text: "twenty", name: "English" }
70+
- { code: vi, tn_text: "", itn_text: "một ngàn ", name: "Vietnamese" }
71+
- { code: de, tn_text: "1", itn_text: "ein hundert ", name: "German" }
72+
- { code: es, tn_text: "1", itn_text: "ciento uno ", name: "Spanish" }
73+
- { code: fr, tn_text: "2", itn_text: "cent ", name: "French" }
74+
- { code: ar, tn_text: "2", itn_text: "اثنان ", name: "Arabic" }
75+
- { code: hi, tn_text: "१", itn_text: "एक ", name: "Hindi" }
76+
- { code: ja, tn_text: "", itn_text: "100", name: "Japanese" }
77+
- { code: zh, tn_text: "6", itn_text: "你", name: "Chinese" }
78+
- { code: hu, tn_text: "100", itn_text: "", name: "Hungarian" }
79+
- { code: it, tn_text: "122", itn_text: "", name: "Italian" }
80+
- { code: pt, tn_text: "", itn_text: "dez ", name: "Portuguese" }
81+
- { code: ru, tn_text: "03", itn_text: "три ", name: "Russian" }
82+
- { code: sv, tn_text: "100", itn_text: "", name: "Swedish" }
83+
- { code: hy, tn_text: "6", itn_text: "վեց ", name: "Armenian" }
84+
- { code: mr, tn_text: "", itn_text: "शून्य ", name: "Marathi" }
85+
86+
steps:
87+
- uses: actions/checkout@v4
88+
89+
- name: Set up Python 3.10
90+
uses: actions/setup-python@v5
91+
with:
92+
python-version: "3.10"
93+
94+
- name: Cache pip packages
95+
uses: actions/cache@v4
96+
with:
97+
path: ~/.cache/pip
98+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/requirements.txt') }}
99+
100+
- name: Cache ${{ matrix.language.name }} grammars
101+
id: cache-grammars
102+
uses: actions/cache@v4
103+
with:
104+
path: ${{ env.CACHE_BASE }}/${{ matrix.language.code }}_grammars
105+
key: ${{ runner.os }}-${{ matrix.language.code }}-grammars-${{ hashFiles(format('nemo_text_processing/**/{0}/**/*.py', matrix.language.code)) }}
106+
restore-keys: |
107+
${{ runner.os }}-${{ matrix.language.code }}-grammars-
108+
109+
- name: Install dependencies
110+
run: |
111+
pip install --upgrade pip setuptools wheel
112+
pip install -r requirements/requirements.txt
113+
pip install --editable ".[all]"
114+
115+
- name: L0 - Create ${{ matrix.language.name }} TN grammars
116+
if: matrix.language.tn_text != ''
117+
timeout-minutes: 15
118+
continue-on-error: true
119+
run: |
120+
echo "Creating TN grammars for ${{ matrix.language.name }}..."
121+
timeout 600 python nemo_text_processing/text_normalization/normalize.py \
122+
--lang=${{ matrix.language.code }} \
123+
--text="${{ matrix.language.tn_text }}" \
124+
--cache_dir ${{ env.CACHE_BASE }}/${{ matrix.language.code }}_grammars \
125+
|| echo "⚠️ TN grammar creation timed out or failed (will retry in tests)"
126+
127+
- name: L0 - Create ${{ matrix.language.name }} ITN grammars
128+
if: matrix.language.itn_text != ''
129+
timeout-minutes: 15
130+
continue-on-error: true
131+
run: |
132+
echo "Creating ITN grammars for ${{ matrix.language.name }}..."
133+
timeout 600 python nemo_text_processing/inverse_text_normalization/inverse_normalize.py \
134+
--lang=${{ matrix.language.code }} \
135+
--text="${{ matrix.language.itn_text }}" \
136+
--cache_dir ${{ env.CACHE_BASE }}/${{ matrix.language.code }}_grammars \
137+
|| echo "⚠️ ITN grammar creation timed out or failed (will retry in tests)"
138+
139+
# ========== L0: Test Utils ==========
140+
141+
l0-test-utils:
142+
name: "L0: Test utils"
143+
needs: setup-and-install
144+
runs-on: ubuntu-latest
145+
timeout-minutes: 10
146+
147+
steps:
148+
- uses: actions/checkout@v4
149+
150+
- name: Set up Python 3.10
151+
uses: actions/setup-python@v5
152+
with:
153+
python-version: "3.10"
154+
155+
- name: Cache pip packages
156+
uses: actions/cache@v4
157+
with:
158+
path: ~/.cache/pip
159+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/requirements.txt') }}
160+
161+
- name: Install dependencies
162+
run: |
163+
pip install --upgrade pip setuptools wheel
164+
pip install -r requirements/requirements.txt
165+
pip install -r requirements/requirements_test.txt
166+
pip install --editable ".[all]"
167+
168+
- name: L0 - Test audio based utils
169+
run: |
170+
pytest tests/nemo_text_processing/audio_based_utils/ --cpu -v
171+
172+
# ========== L1: TN/ITN Tests (Sequential per language) ==========
173+
174+
l1-test-languages:
175+
name: "L1: Test ${{ matrix.language.name }}"
176+
needs: l0-create-grammars
177+
runs-on: ubuntu-latest
178+
timeout-minutes: 45
179+
strategy:
180+
fail-fast: false
181+
max-parallel: 3 # Limit parallel to avoid resource issues
182+
matrix:
183+
language:
184+
- { code: en, name: "English", enabled: true }
185+
- { code: vi, name: "Vietnamese", enabled: true }
186+
- { code: de, name: "German", enabled: true }
187+
- { code: es, name: "Spanish", enabled: true }
188+
- { code: fr, name: "French", enabled: true }
189+
- { code: ar, name: "Arabic", enabled: true }
190+
- { code: hi, name: "Hindi", enabled: true }
191+
- { code: ja, name: "Japanese", enabled: true }
192+
- { code: zh, name: "Chinese", enabled: true }
193+
- { code: pt, name: "Portuguese", enabled: true }
194+
- { code: ru, name: "Russian", enabled: true }
195+
- { code: hy, name: "Armenian", enabled: true }
196+
- { code: mr, name: "Marathi", enabled: true }
197+
exclude:
198+
- language: { enabled: false }
199+
200+
steps:
201+
- uses: actions/checkout@v4
202+
203+
- name: Set up Python 3.10
204+
uses: actions/setup-python@v5
205+
with:
206+
python-version: "3.10"
207+
208+
- name: Cache pip packages
209+
uses: actions/cache@v4
210+
with:
211+
path: ~/.cache/pip
212+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/requirements.txt') }}
213+
214+
- name: Restore ${{ matrix.language.name }} grammars cache
215+
uses: actions/cache@v4
216+
with:
217+
path: ${{ env.CACHE_BASE }}/${{ matrix.language.code }}_grammars
218+
key: ${{ runner.os }}-${{ matrix.language.code }}-grammars-${{ hashFiles(format('nemo_text_processing/**/{0}/**/*.py', matrix.language.code)) }}
219+
restore-keys: |
220+
${{ runner.os }}-${{ matrix.language.code }}-grammars-
221+
222+
- name: Install dependencies
223+
run: |
224+
pip install --upgrade pip setuptools wheel
225+
pip install -r requirements/requirements.txt
226+
pip install -r requirements/requirements_test.txt
227+
pip install --editable ".[all]"
228+
229+
- name: L1 - Run all ${{ matrix.language.name }} TN/ITN tests
230+
timeout-minutes: 40
231+
continue-on-error: false
232+
run: |
233+
if [ -d "tests/nemo_text_processing/${{ matrix.language.code }}" ]; then
234+
echo "Running tests for ${{ matrix.language.name }}..."
235+
pytest tests/nemo_text_processing/${{ matrix.language.code }}/ \
236+
-m "not pleasefixme" \
237+
--cpu \
238+
--tn_cache_dir ${{ env.CACHE_BASE }}/${{ matrix.language.code }}_grammars \
239+
-v \
240+
--tb=short \
241+
|| (echo "❌ Tests failed for ${{ matrix.language.name }}" && exit 1)
242+
else
243+
echo "⚠️ No tests found for ${{ matrix.language.code }}"
244+
fi
245+
246+
- name: Upload test results for ${{ matrix.language.name }}
247+
if: always()
248+
uses: actions/upload-artifact@v4
249+
with:
250+
name: test-results-${{ matrix.language.code }}
251+
path: |
252+
.pytest_cache/
253+
result.log
254+
retention-days: 7
255+
256+
# ========== L2: Export Grammars (Optional - only on main) ==========
257+
258+
l2-export-grammars:
259+
name: "L2: Export Grammars"
260+
needs: l1-test-languages
261+
if: github.ref == 'refs/heads/main'
262+
runs-on: ubuntu-latest
263+
timeout-minutes: 30
264+
strategy:
265+
fail-fast: false
266+
matrix:
267+
export:
268+
- { lang: en, grammars: itn_grammars, name: "English ITN" }
269+
- { lang: vi, grammars: itn_grammars, name: "Vietnamese ITN" }
270+
271+
steps:
272+
- uses: actions/checkout@v4
273+
274+
- name: Set up Python 3.10
275+
uses: actions/setup-python@v5
276+
with:
277+
python-version: "3.10"
278+
279+
- name: Cache pip packages
280+
uses: actions/cache@v4
281+
with:
282+
path: ~/.cache/pip
283+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/requirements.txt') }}
284+
285+
- name: Install dependencies
286+
run: |
287+
pip install --upgrade pip setuptools wheel
288+
pip install -r requirements/requirements.txt
289+
pip install --editable ".[all]"
290+
291+
- name: L2 - Export ${{ matrix.export.name }} grammars
292+
timeout-minutes: 20
293+
run: |
294+
cd tools/text_processing_deployment
295+
python pynini_export.py \
296+
--output_dir=./exported_${{ matrix.export.lang }} \
297+
--grammars=${{ matrix.export.grammars }} \
298+
--language=${{ matrix.export.lang }} \
299+
--cache_dir=${{ env.CACHE_BASE }}/${{ matrix.export.lang }}_grammars
300+
ls -R ./exported_${{ matrix.export.lang }}
301+
302+
- name: Upload exported grammars
303+
uses: actions/upload-artifact@v4
304+
with:
305+
name: exported-grammars-${{ matrix.export.lang }}-${{ matrix.export.grammars }}
306+
path: tools/text_processing_deployment/exported_${{ matrix.export.lang }}
307+
retention-days: 30
308+
309+
# ========== Final Summary ==========
310+
311+
test-summary:
312+
name: "Test Summary"
313+
needs: [l0-test-utils, l1-test-languages]
314+
if: always()
315+
runs-on: ubuntu-latest
316+
timeout-minutes: 5
317+
318+
steps:
319+
- name: Check test results
320+
run: |
321+
echo "=========================================="
322+
echo "CI Pipeline Summary"
323+
echo "=========================================="
324+
echo "✓ Setup: ${{ needs.setup-and-install.result }}"
325+
echo "✓ Utils: ${{ needs.l0-test-utils.result }}"
326+
echo "✓ Language Tests: ${{ needs.l1-test-languages.result }}"
327+
echo "=========================================="
328+
329+
if [ "${{ needs.l1-test-languages.result }}" != "success" ]; then
330+
echo "❌ Some language tests failed"
331+
exit 1
332+
fi
333+
334+
echo "✅ All tests passed!"

0 commit comments

Comments
 (0)