Unit Tests #2148

Workflow file for this run

.github/workflows/unit_tests.yml at cde1604

	name: Unit Tests

	run-name: "${{ github.event.inputs.title }}"

	defaults:
	run:
	shell: bash -le {0}

	on:
	repository_dispatch:
	workflow_dispatch:
	inputs:
	title:
	description: 'set a title for this run'
	required: false
	default: ''
	repo:
	description: 'GitHub repo {owner}/{repo}'
	required: false
	default: ''
	ref:
	description: 'GitHub ref: Branch, Tag or Commit SHA'
	required: false
	default: ''
	pr_number:
	description: 'PR Number'
	required: false
	type: number
	test_regex:
	description: 'Regex to filter test files'
	required: false
	default: ''
	max-parallel:
	description: 'Parallel jobs'
	required: false
	default: '4'
	model-test-mode:
	description: 'Model compat test mode'
	type: choice
	required: false
	default: 'fast'
	options:
	- 'fast'
	- 'slow'
	server:
	description: 'Wheel Build Server'
	type: choice
	default: '["self-hosted", "xeon5"]'
	options:
	- '["self-hosted", "xeon5"]'
	- '["self-hosted", "zen4"]'

	env:
	CUDA_DEVICE_ORDER: PCI_BUS_ID
	CUDA_VISIBLE_DEVICES: 0
	# Force fresh torch.ops JIT builds in CI so heterogeneous GPUs never reuse
	# cached kernels compiled by earlier jobs/tests.
	GPTQMODEL_KERNEL_REBUILD: '1'
	# CI can allocate A100-class devices (sm_80), so the shared JIT arch list
	# must include 8.0 in addition to the newer targets.
	TORCH_CUDA_ARCH_LIST: 8.0;8.6;8.9;9.0;12.0
	PYTORCH_ALLOC_CONF: 'expandable_segments:True'
	RUNNER: 10.0.13.31
	XEON5: 10.0.14.249
	GPU_ALLOCATOR_URL: http://10.0.13.31/gpu
	LOGBAR_ANIMATION: '0'
	CUDA_VERSION: 130
	UV_TORCH_BACKEND: cu130
	TORCH_VERSION: 2.11.0
	# vllm doesn't support 3.14
	PYTHON_VERSION: 3.13
	UV_PYTHON: 3.13
	# PYTHON_GIL: 0 // test libs don't support yet
	IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py,test_bits_new.py,models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
	MODEL_TEST_MODE: ${{ github.event.inputs['model-test-mode'] \|\| 'fast' }}
	MODEL_TEST_GPU_COUNT: '1'
	DEFUSER_GIT_URL: git+https://github.com/modelcloud/Defuser.git
	PYPCRE_GIT_URL: git+https://github.com/modelcloud/PyPcre.git
	TOKENICER_GIT_URL: git+https://github.com/modelcloud/Tokenicer.git
	LOGBAR_GIT_URL: git+https://github.com/modelcloud/LogBar.git
	EVALUTION_GIT_URL: git+https://github.com/modelcloud/Evalution.git
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}
	HF_TOKEN: ${{ secrets.HF_TOKEN }}

	concurrency:
	group: ${{ github.event.inputs.ref \|\| github.ref }}-workflow-unit-tests #-${{ github.event.inputs.test_names }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	check-vm:
	runs-on: ubuntu-latest
	outputs:
	ip: ${{ steps.get_ip.outputs.ip }}
	run_id: ${{ steps.get_ip.outputs.run_id }}
	max-parallel: ${{ steps.get_ip.outputs['max-parallel'] }}
	cuda_version: ${{ env.CUDA_VERSION }}
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v6
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Print env
	run: \|
	echo "repo: ${{ env.repo }}"
	echo "ref: ${{ env.ref }}"
	echo "selected server: ${{ github.event.inputs.server }}"

	- name: Set server
	id: get_ip
	run: \|
	bash .github/scripts/ci_write_runner_outputs.sh \
	"$RUNNER" \
	"${{ github.run_id }}" \
	"${{ github.event.inputs['max-parallel'] }}"

	list-test-files:
	runs-on: ubuntu-latest
	outputs:
	cpu-files: ${{ steps.files.outputs.cpu-files }}
	torch-files: ${{ steps.files.outputs.torch-files }}
	model-files: ${{ steps.files.outputs.model-files }}
	m4-files: ${{ steps.files.outputs.m4-files }}
	cpu-matrix: ${{ steps.files.outputs.cpu-matrix }}
	torch-matrix: ${{ steps.files.outputs.torch-matrix }}
	model-matrix: ${{ steps.files.outputs.model-matrix }}

	steps:
	- name: Checkout Codes
	uses: actions/checkout@v6
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Prepare checkout
	run: \|
	.github/scripts/ci_prepare_checkout.sh "${{ github.event.inputs.pr_number }}"

	- name: List files
	id: files
	run: \|
	test_plan=$(python3 .github/scripts/ci_workflow.py list-tests \
	--ignored-test-files "$IGNORED_TEST_FILES" \
	--test-regex "${{ github.event.inputs.test_regex }}")
	echo "Test plan: $test_plan"

	TEST_PLAN="$test_plan" python3 - <<'PY' >> "$GITHUB_OUTPUT"
	import json
	import os

	plan = json.loads(os.environ["TEST_PLAN"])
	print(f"cpu-files={json.dumps(plan['cpu_files'])}")
	print(f"torch-files={json.dumps(plan['torch_files'])}")
	print(f"model-files={json.dumps(plan['model_files'])}")
	print(f"m4-files={json.dumps(plan['mlx_files'])}")
	print(f"cpu-matrix={json.dumps(plan['cpu_matrix'])}")
	print(f"torch-matrix={json.dumps(plan['torch_matrix'])}")
	print(f"model-matrix={json.dumps(plan['model_matrix'])}")
	PY

	TEST_PLAN="$test_plan" python3 - <<'PY'
	import json
	import os

	plan = json.loads(os.environ["TEST_PLAN"])
	print(f"CPU Test files: {json.dumps(plan['cpu_files'])}")
	print(f"Torch Test files: {json.dumps(plan['torch_files'])}")
	print(f"Model Compat Test files: {json.dumps(plan['model_files'])}")
	print(f"MLX Test files: {json.dumps(plan['mlx_files'])}")
	print(f"CPU Matrix: {json.dumps(plan['cpu_matrix'])}")
	print(f"Torch Matrix: {json.dumps(plan['torch_matrix'])}")
	print(f"Model Matrix: {json.dumps(plan['model_matrix'])}")
	PY
	echo "Ignored Test files: $IGNORED_TEST_FILES"

	cpu:
	needs:
	- list-test-files
	- check-vm
	if: always() && !cancelled() && needs.list-test-files.outputs.cpu-matrix != '[]'
	uses: ./.github/workflows/unit_tests_reusable.yml
	secrets: inherit
	with:
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}
	pr_number: ${{ fromJSON(github.event.inputs.pr_number \|\| '0') }}
	check_vm_ip: ${{ needs.check-vm.outputs.ip }}
	check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
	check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
	matrix_json: ${{ needs.list-test-files.outputs.cpu-matrix \|\| '[]' }}
	model_test_mode: ${{ github.event.inputs['model-test-mode'] \|\| 'fast' }}

	torch:
	needs:
	- list-test-files
	- check-vm
	if: always() && !cancelled() && needs.list-test-files.outputs.torch-matrix != '[]'
	uses: ./.github/workflows/unit_tests_reusable.yml
	secrets: inherit
	with:
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}
	pr_number: ${{ fromJSON(github.event.inputs.pr_number \|\| '0') }}
	check_vm_ip: ${{ needs.check-vm.outputs.ip }}
	check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
	check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
	matrix_json: ${{ needs.list-test-files.outputs.torch-matrix \|\| '[]' }}
	model_test_mode: ${{ github.event.inputs['model-test-mode'] \|\| 'fast' }}

	models:
	needs:
	- list-test-files
	- check-vm
	if: always() && !cancelled() && needs.list-test-files.outputs.model-matrix != '[]'
	uses: ./.github/workflows/unit_tests_reusable.yml
	secrets: inherit
	with:
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}
	pr_number: ${{ fromJSON(github.event.inputs.pr_number \|\| '0') }}
	check_vm_ip: ${{ needs.check-vm.outputs.ip }}
	check_vm_max_parallel: ${{ needs.check-vm.outputs['max-parallel'] }}
	check_vm_cuda_version: ${{ needs.check-vm.outputs.cuda_version }}
	matrix_json: ${{ needs.list-test-files.outputs.model-matrix \|\| '[]' }}
	model_test_mode: ${{ github.event.inputs['model-test-mode'] \|\| 'fast' }}

	check-torch:
	runs-on: [ self-hosted, xeon5 ]
	container:
	image: 10.0.13.31:5000/nvidia/cuda:130-ubuntu24.04_0415
	options: --device /dev/dri --ipc=host --runtime=nvidia --gpus all
	volumes:
	- /monster/ci/env/entrypoint.sh:/entrypoint.sh
	- /monster/ci/env/entrypoint.sh:/etc/profile.d/01-entrypoint.sh
	- /dev/dri/by-path:/dev/dri/by-path
	- /monster/ci/uv:/opt/uv
	- /monster/ci/env:/opt/env
	strategy:
	fail-fast: false
	matrix:
	include:
	- name: Test pypi pip
	env_name: pypi_pip_env_${{ github.run_id }}_${{ github.run_attempt }}
	use_pip: 'true'
	install_cmd: pip install gptqmodel torch -U
	- name: Test pypi uv
	env_name: pypi_uv_env_${{ github.run_id }}_${{ github.run_attempt }}
	use_pip: 'false'
	install_cmd: uv pip install gptqmodel torch -U
	- name: Test local pip
	env_name: local_pip_env_${{ github.run_id }}_${{ github.run_attempt }}
	use_pip: 'true'
	install_cmd: pip install . torch -U
	- name: Test local uv
	env_name: local_uv_env_${{ github.run_id }}_${{ github.run_attempt }}
	use_pip: 'false'
	install_cmd: uv pip install . torch -U
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v6
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Prepare checkout
	run: \|
	.github/scripts/ci_prepare_checkout.sh "${{ github.event.inputs.pr_number }}"

	- name: ${{ matrix.name }}
	run: \|
	uv venv "${{ matrix.env_name }}"
	source "${{ matrix.env_name }}/bin/activate"
	if [[ "${{ matrix.use_pip }}" == "true" ]]; then
	uv pip install pip -U
	fi
	${{ matrix.install_cmd }}


	setuptools-compatibility:
	uses: ./.github/workflows/setuptools_compatibility_reusable.yml
	with:
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}
	pr_number: ${{ fromJSON(github.event.inputs.pr_number \|\| '0') }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unit Tests #2148

Workflow file

Unit Tests #2148

Uh oh!

Workflow file for this run