Skip to content

Commit 2c563ad

Browse files
authored
Merge pull request #419 from Vincent777/feat/ci
feat: CI module integration
2 parents f4c8b29 + 79259ba commit 2c563ad

3 files changed

Lines changed: 410 additions & 0 deletions

File tree

.github/ci_config.yaml

Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
repo:
2+
url: https://github.com/Vincent777/InfiniLM.git
3+
branch: main
4+
5+
github:
6+
status_context_prefix: "ci/infinilm"
7+
8+
platforms:
9+
nvidia:
10+
image:
11+
dockerfile: images/nvidia/
12+
build_args:
13+
BASE_IMAGE: nvcr.io/nvidia/pytorch:25.12-py3
14+
CUDA_ARCH: sm_80,sm_86,sm_89,sm_90
15+
APT_MIRROR: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
16+
PIP_INDEX_URL: https://pypi.org/simple
17+
InfiniCore_BRANCH: main
18+
docker_args:
19+
- "--user=root"
20+
- "--network=host"
21+
- "--privileged"
22+
- "--cap-add=ALL"
23+
- "--pid=host"
24+
- "--ipc=host"
25+
- "--workdir=/workspace"
26+
volumes:
27+
- /data:/data
28+
- /data-aisoft:/data-aisoft
29+
- /data-aisoft/artifacts/CI_nvidia_test/__WORKSPACE__:/artifacts
30+
setup: pip install .[dev] --no-build-isolation
31+
jobs:
32+
gpu_inferencetest:
33+
type: inferencetest
34+
resources:
35+
ngpus: [1, 4]
36+
gpu_style: nvidia
37+
shm_size: 64g
38+
timeout: 3600
39+
stages:
40+
- name: test
41+
run: python InfiniLM/examples/test_infer.py --device nvidia --model=/data-aisoft/mechdancer/models/9g_8b_thinking/
42+
gpu_benchtest:
43+
type: benchtest
44+
resources:
45+
gpu_style: nvidia
46+
shm_size: 64g
47+
timeout: 3600
48+
env:
49+
TEST_PARAM: ['default', '--enable-paged-attn', '--enable-paged-attn --enable-graph', '--enable-paged-attn --enable-graph --attn=flash-attn']
50+
stages:
51+
- name: test
52+
run: python InfiniLM/examples/bench.py --device nvidia --model=/data-aisoft/mechdancer/models/9g_8b_thinking/ --input-len=256,1024 --output-len=256,1024 --batch-size=8 <TEST_PARAM>
53+
gpu_accuracytest:
54+
type: accuracytest
55+
resources:
56+
gpu_style: nvidia
57+
shm_size: 64g
58+
timeout: 3600
59+
env:
60+
TEST_PARAM: ['--bench mmlu', '--bench ceval --enable-paged-attn --enable-graph']
61+
stages:
62+
- name: test
63+
run: python InfiniLM/test/bench/test_benchmark.py --device nvidia --model /data-aisoft/mechdancer/models/9g_8b_thinking/ --bench mmlu --backend cpp --max-new-tokens 5 --cache-dir /data-aisoft/pepe/datasets/ --split=val <TEST_PARAM>
64+
gpu_servicetest:
65+
type: servicetest
66+
resources:
67+
shm_size: 64g
68+
env:
69+
MODEL_LIST: 9g_8b_thinking
70+
ENGINE: InfiniLM
71+
TEST_PARAM: ['default', '--enable-paged-attn --cache-type=paged', '--enable-paged-attn --cache-type=paged --enable-graph']
72+
stages:
73+
- name: test
74+
run: python InfiniLM/scripts/test_perf.py --verbose
75+
76+
metax:
77+
image:
78+
dockerfile: images/metax/
79+
build_args:
80+
BASE_IMAGE: cr.metax-tech.com/public-ai-release/maca/vllm-metax:0.10.2-maca.ai3.2.1.7-torch2.6-py310-ubuntu22.04-amd64
81+
APT_MIRROR: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
82+
PIP_INDEX_URL: https://pypi.org/simple
83+
InfiniCore_BRANCH: main
84+
docker_args:
85+
- "--user=root"
86+
- "--network=host"
87+
- "--privileged"
88+
- "--cap-add=ALL"
89+
- "--pid=host"
90+
- "--ipc=host"
91+
- "--workdir=/workspace"
92+
- "--ulimit=memlock=-1"
93+
- "--ulimit=stack=67108864"
94+
volumes:
95+
- /data:/data
96+
- /data-aisoft:/data-aisoft
97+
- /data-aisoft/artifacts/CI_metax_test/__WORKSPACE__:/artifacts
98+
setup: pip install .[dev] --no-build-isolation
99+
jobs:
100+
gpu_inferencetest:
101+
type: inferencetest
102+
resources:
103+
ngpus: [1, 1, 2]
104+
gpu_style: none
105+
shm_size: 64g
106+
timeout: 3600
107+
env:
108+
MODEL_LIST: 9g_8b_thinking
109+
TEST_PARAM: ['--attn=default', '--attn=flash-attn --enable-graph --enable-paged-attn --cache-type=paged --block-size=256 --max-new-tokens=256', '--attn=flash-attn --enable-graph --enable-paged-attn --cache-type=paged --block-size=256 --max-new-tokens=256']
110+
stages:
111+
- name: test
112+
run: python -u "InfiniLM/examples/test_infer.py" --device metax --model=/data-aisoft/mechdancer/models/9g_8b_thinking/ --backend cpp --prompt "Hello" <TEST_PARAM>
113+
114+
moore:
115+
image:
116+
dockerfile: images/moore/
117+
build_args:
118+
BASE_IMAGE: sh-harbor.mthreads.com/mcctest/vllm_musa:20251112_hygon
119+
APT_MIRROR: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
120+
PIP_INDEX_URL: https://pypi.org/simple
121+
InfiniCore_BRANCH: main
122+
docker_args:
123+
- "--user=root"
124+
- "--network=host"
125+
- "--privileged"
126+
- "--cap-add=ALL"
127+
- "--pid=host"
128+
- "--ipc=host"
129+
- "--workdir=/workspace"
130+
volumes:
131+
- /data:/data
132+
- /data-aisoft:/data-aisoft
133+
- /data-aisoft/artifacts/CI_moore_test/__WORKSPACE__:/artifacts
134+
setup: pip install .[dev] --no-build-isolation
135+
jobs:
136+
gpu_inferencetest:
137+
type: inferencetest
138+
resources:
139+
ngpus: [1, 2]
140+
gpu_style: none
141+
shm_size: 64g
142+
timeout: 3600
143+
stages:
144+
- name: test
145+
run: python InfiniLM/examples/test_infer.py --device moore --model=/data-aisoft/mechdancer/models/9g_8b_thinking/
146+
gpu_benchtest:
147+
type: benchtest
148+
resources:
149+
gpu_style: none
150+
shm_size: 64g
151+
timeout: 3600
152+
env:
153+
TEST_PARAM: ['--batch-size=1', '--enable-paged-attn --batch-size=1', '--enable-paged-attn --enable-graph --batch-size=8']
154+
stages:
155+
- name: test
156+
run: python InfiniLM/examples/bench.py --device moore --model=/data-aisoft/mechdancer/models/9g_8b_thinking/ --input-len=256,1024 --output-len=256,1024 <TEST_PARAM>
157+
gpu_accuracytest:
158+
type: accuracytest
159+
resources:
160+
gpu_style: none
161+
shm_size: 64g
162+
timeout: 18000
163+
env:
164+
TEST_PARAM: ['--bench ceval', '--bench mmlu']
165+
stages:
166+
- name: test
167+
run: python InfiniLM/test/bench/test_benchmark.py --device moore --model /data-aisoft/zhushuang/models/9g_8b_thinking_llama/ --subject all --backend cpp --tp 1 --output_csv 8b_mmlu_paged_results.csv --split val --enable-paged-attn <TEST_PARAM>
168+
gpu_servicetest:
169+
type: servicetest
170+
resources:
171+
gpu_style: none
172+
shm_size: 64g
173+
timeout: 3600
174+
env:
175+
MODEL_LIST: 9g_8b_thinking
176+
ENGINE: InfiniLM
177+
TEST_PARAM: ['default', '--num-blocks=1024 --block-size=256', '--enable-graph --enable-paged-attn --num-blocks=1024 --block-size=256']
178+
stages:
179+
- name: test
180+
run: python InfiniLM/scripts/test_perf.py --verbose
181+
182+
cambricon:
183+
image:
184+
dockerfile: images/cambricon/
185+
build_args:
186+
BASE_IMAGE: pepe_working:latest
187+
APT_MIRROR: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
188+
PIP_INDEX_URL: https://pypi.org/simple
189+
InfiniCore_BRANCH: main
190+
docker_args:
191+
- "--user=root"
192+
- "--network=host"
193+
- "--privileged"
194+
- "--cap-add=ALL"
195+
- "--pid=host"
196+
- "--ipc=host"
197+
- "--workdir=/workspace"
198+
volumes:
199+
- /data:/data
200+
- /data-aisoft:/data-aisoft
201+
- /data-aisoft/artifacts/CI_cambricon_test/__WORKSPACE__:/artifacts
202+
setup: pip install .[dev] --no-build-isolation
203+
jobs:
204+
gpu_inferencetest:
205+
type: inferencetest
206+
resources:
207+
gpu_style: mlu
208+
shm_size: 64g
209+
timeout: 3600
210+
stages:
211+
- name: test
212+
run: python InfiniLM/examples/test_infer.py --device cambricon --model=/data-aisoft/mechdancer/models/9g_8b_thinking/
213+
gpu_benchtest:
214+
type: benchtest
215+
resources:
216+
gpu_style: mlu
217+
shm_size: 64g
218+
timeout: 3600
219+
stages:
220+
- name: test
221+
run: python InfiniLM/examples/bench.py --device cambricon --model=/data-aisoft/mechdancer/models/9g_8b_thinking/ --input-len=256,1024 --output-len=256,1024 --batch-size=8
222+
gpu_accuracytest:
223+
type: accuracytest
224+
resources:
225+
gpu_style: mlu
226+
shm_size: 64g
227+
timeout: 3600
228+
env:
229+
TEST_PARAM: ['--bench ceval --subject accountant', '--bench mmlu --subject abstract_algebra']
230+
stages:
231+
- name: test
232+
run: python InfiniLM/test/bench/test_benchmark.py --device cambricon --model /data-aisoft/mechdancer/models/9g_8b_thinking/ --backend cpp --max-new-tokens 5 --cache-dir /data-aisoft/pepe/datasets/ --split=val <TEST_PARAM>
233+
gpu_servicetest:
234+
type: servicetest
235+
resources:
236+
gpu_style: mlu
237+
shm_size: 64g
238+
timeout: 3600
239+
env:
240+
MODEL_LIST: 9g_8b_thinking
241+
ENGINE: InfiniLM
242+
TEST_PARAM: ['default']
243+
stages:
244+
- name: test
245+
run: python InfiniLM/scripts/test_perf.py --verbose
246+
247+
# ascend:
248+
# image:
249+
# dockerfile: images/ascend/
250+
# build_args:
251+
# BASE_IMAGE: quay.io/ascend/vllm-ascend:v0.18.0rc1-openeuler
252+
# PIP_INDEX_URL: https://pypi.org/simple
253+
# docker_args:
254+
# - "--runtime=runc"
255+
# - "--privileged"
256+
# - "--device=/dev/davinci0"
257+
# - "--device=/dev/davinci_manager"
258+
# - "--device=/dev/devmm_svm"
259+
# - "--device=/dev/hisi_hdc"
260+
# volumes:
261+
# - /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro
262+
# - /usr/local/dcmi:/usr/local/dcmi:ro
263+
# - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi:ro
264+
# - /data-aisoft/artifacts/CI_ascend_test/__WORKSPACE__:/artifacts
265+
# env:
266+
# ASCEND_HOME_PATH: /usr/local/Ascend/ascend-toolkit/latest
267+
# setup: pip install .[dev] --no-build-isolation
268+
# jobs:
269+
# npu:
270+
# type: unittest
271+
# resources:
272+
# ngpus: 1
273+
# gpu_style: none
274+
# memory: 32GB
275+
# shm_size: 16g
276+
# timeout: 3600
277+
# stages:
278+
# - name: test
279+
# run: pytest tests/ -n 1 --devices ascend -v --tb=short --junitxml=/workspace/results/test-results.xml
280+
281+
hygon:
282+
image:
283+
dockerfile: images/hygon/
284+
build_args:
285+
BASE_IMAGE: image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04.1-py3.10
286+
APT_MIRROR: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
287+
PIP_INDEX_URL: https://pypi.org/simple
288+
InfiniCore_BRANCH: main
289+
docker_args:
290+
- "--user=root"
291+
- "--network=host"
292+
- "--privileged"
293+
- "--cap-add=ALL"
294+
- "--pid=host"
295+
- "--ipc=host"
296+
- "--workdir=/workspace"
297+
- "--device=/dev/kfd"
298+
- "--device=/dev/mkfd"
299+
- "--device=/dev/dri"
300+
- "--security-opt"
301+
- "seccomp=unconfined"
302+
- "--group-add=video"
303+
- "--group-add=render"
304+
volumes:
305+
- /data:/data
306+
- /data-aisoft:/data-aisoft
307+
- /data-aisoft/artifacts/CI_hygon_test/__WORKSPACE__:/artifacts
308+
- /opt/hyhal:/opt/hyhal
309+
setup: pip install .[dev] --no-build-isolation
310+
jobs:
311+
gpu_inferencetest:
312+
type: inferencetest
313+
resources:
314+
ngpus: [1, 2]
315+
gpu_style: none
316+
shm_size: 64g
317+
timeout: 3600
318+
stages:
319+
- name: test
320+
run: python InfiniLM/examples/test_infer.py --device hygon --model=/data-aisoft/mechdancer/models/9g_8b_thinking/
321+
gpu_benchtest:
322+
type: benchtest
323+
resources:
324+
gpu_style: none
325+
shm_size: 64g
326+
timeout: 3600
327+
env:
328+
TEST_PARAM: ['default']
329+
stages:
330+
- name: test
331+
run: python InfiniLM/examples/bench.py --device hygon --model=/data-aisoft/mechdancer/models/9g_8b_thinking/ --input-len=256,1024 --output-len=256,1024 --batch-size=8 <TEST_PARAM>
332+
gpu_accuracytest:
333+
type: accuracytest
334+
resources:
335+
gpu_style: none
336+
shm_size: 64g
337+
timeout: 10800
338+
env:
339+
TEST_PARAM: ['--bench mmlu', '--bench ceval']
340+
stages:
341+
- name: test
342+
run: python InfiniLM/test/bench/test_benchmark.py --device hygon --model /data-aisoft/mechdancer/models/9g_8b_thinking/ --backend cpp --max-new-tokens 5 --cache-dir /data-aisoft/pepe/datasets/ --split=val <TEST_PARAM>
343+
gpu_servicetest:
344+
type: servicetest
345+
resources:
346+
gpu_style: none
347+
shm_size: 64g
348+
timeout: 3600
349+
env:
350+
MODEL_LIST: 9g_8b_thinking
351+
ENGINE: InfiniLM
352+
TEST_PARAM: ['default']
353+
stages:
354+
- name: test
355+
run: python InfiniLM/scripts/test_perf.py --verbose

.github/workflows/ci_test.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: CI
2+
3+
on:
4+
pull_request:
5+
branches: ["main"]
6+
types: [opened, reopened] # 只在 PR 创建/重新打开时自动跑
7+
workflow_dispatch: # 支持在 Actions 页手动重跑
8+
9+
jobs:
10+
ci:
11+
uses: InfiniTensor/ci/.github/workflows/infinilm-ci.yml@infiniCore_ci
12+
with:
13+
config_path: .github/ci_config.yaml
14+
ci_ref: infiniCore_ci
15+
secrets: inherit

0 commit comments

Comments
 (0)