tests : add support for qwen3 SSM archs (#24031) #72

Workflow file for this run

.github/workflows/server-self-hosted.yml at 06938ac

	name: Server (self-hosted)

	on:
	workflow_dispatch: # allows manual triggering
	inputs:
	sha:
	description: 'Commit SHA1 to build'
	required: false
	type: string
	slow_tests:
	description: 'Run slow tests'
	required: true
	type: boolean
	push:
	branches:
	- master
	paths: [
	'.github/workflows/server-self-hosted.yml',
	'**/CMakeLists.txt',
	'**/Makefile',
	'*/.h',
	'*/.hpp',
	'*/.c',
	'*/.cpp',
	'*/.cu',
	'*/.swift',
	'*/.m',
	'tools/server/*.'
	]

	env:
	LLAMA_ARG_LOG_COLORS: 1
	LLAMA_ARG_LOG_PREFIX: 1
	LLAMA_ARG_LOG_TIMESTAMPS: 1
	LLAMA_ARG_LOG_VERBOSITY: 10

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	server-metal:
	runs-on: [self-hosted, llama-server, macOS, ARM64]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	ref: ${{ github.event.inputs.sha \|\| github.event.pull_request.head.sha \|\| github.sha \|\| github.head_ref \|\| github.ref_name }}

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build -DGGML_SCHED_NO_REALLOC=ON
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) --target llama-server

	- name: Python setup
	id: setup_python
	run: \|
	cd tools/server/tests
	python3 -m venv venv
	source venv/bin/activate
	pip install -r requirements.txt

	- name: Tests (GPUx1)
	id: server_integration_tests
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	pytest -v -x -m "not slow"

	- name: Tests (GPUx1, backend-sampling)
	id: server_integration_tests_backend_sampling
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	export LLAMA_ARG_BACKEND_SAMPLING=1
	pytest -v -x -m "not slow"

	- name: Tests (GPUx2)
	id: server_integration_tests_gpu2
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	export GGML_METAL_DEVICES=2
	pytest -v -x -m "not slow"

	- name: Tests (GPUx2, backend-sampling)
	id: server_integration_tests_gpu2_backend_sampling
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	export GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1
	pytest -v -x -m "not slow"

	server-cuda:
	runs-on: [self-hosted, llama-server, Linux, NVIDIA]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	ref: ${{ github.event.inputs.sha \|\| github.event.pull_request.head.sha \|\| github.sha \|\| github.head_ref \|\| github.ref_name }}

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build -DGGML_CUDA=ON -DGGML_SCHED_NO_REALLOC=ON
	cmake --build build --config Release -j $(nproc) --target llama-server

	- name: Python setup
	id: setup_python
	run: \|
	cd tools/server/tests
	python3 -m venv venv
	source venv/bin/activate
	pip install -r requirements.txt

	- name: Tests (GPUx1)
	id: server_integration_tests
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	pytest -v -x -m "not slow"

	- name: Tests (GPUx1, backend-sampling)
	id: server_integration_tests_backend_sampling
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	export LLAMA_ARG_BACKEND_SAMPLING=1
	pytest -v -x -m "not slow"

	server-kleidiai:
	runs-on: ah-ubuntu_22_04-c8g_8x

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v6
	with:
	fetch-depth: 0
	ref: ${{ github.event.inputs.sha \|\| github.event.pull_request.head.sha \|\| github.sha \|\| github.head_ref \|\| github.ref_name }}

	- name: Dependencies
	id: depends
	run: \|
	set -euxo pipefail
	sudo apt-get update
	sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
	apt-get install -y \
	build-essential \
	libssl-dev \
	python3-venv \
	gpg \
	wget \
	time \
	git-lfs

	git lfs install

	# install the latest cmake
	sudo install -d /usr/share/keyrings
	wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \
	\| gpg --dearmor \
	\| sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
	echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \
	\| sudo tee /etc/apt/sources.list.d/kitware.list
	sudo apt-get update
	sudo apt-get install -y cmake

	- name: Build
	id: cmake_build
	run: \|
	cmake -B build -DGGML_SCHED_NO_REALLOC=ON -DGGML_CPU_KLEIDIAI=ON
	cmake --build build --config Release -j $(nproc) --target llama-server

	- name: Python setup
	id: setup_python
	run: \|
	cd tools/server/tests
	python3 -m venv venv
	source venv/bin/activate
	pip install -r requirements.txt

	- name: Tests
	id: server_integration_tests
	if: ${{ !github.event.pull_request }}
	run: \|
	cd tools/server/tests
	source venv/bin/activate
	pytest -v -x -m "not slow"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

tests : add support for qwen3 SSM archs (#24031) #72

Workflow file

tests : add support for qwen3 SSM archs (#24031) #72

Uh oh!

Workflow file for this run