PR-B2 (ADR 0008 Phase B): AppendTokens + byte-exact prefill-incremental contract #72

Workflow file for this run

	name: CI

	# Runs on every push to main and on every PR targeting main.
	#
	# Scope: platform-neutral unit tests with 100% line coverage on the
	# library modules we actually ship for this commit. We deliberately
	# DO NOT run:
	#
	# * tests/core/ — needs HuggingFace weights
	# * tests/system/ — same, plus is slow
	# * tests/inference_engine/proposer/ — uses real Qwen3 sparse
	# proposer; HF-cache-bound
	# * tests/backends/mlx/test_{verifier,proposer,cache,torch_bridge}.py
	# — Apple-Silicon only
	#
	# Mac and CUDA contributors run the full suite locally via
	# scripts/run_platform_tests.sh and push the platform-test reports to
	# the PR branch as evidence; this CI workflow guards the platform-
	# neutral surface so a regression there cannot land on main.

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]
	workflow_dispatch: {}

	# Cancel superseded runs on the same branch — saves CI time on
	# rapid-fire pushes.
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	unit-tests:
	name: unit tests + 100% coverage
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.12"]
	steps:
	- name: Check out
	uses: actions/checkout@v4

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}
	cache: pip

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt

	- name: Show installed key versions
	run: \|
	python -c "import torch, fastapi, pydantic, prometheus_client, transformers; \
	print('torch', torch.__version__); \
	print('fastapi', fastapi.__version__); \
	print('pydantic', pydantic.VERSION); \
	print('transformers', transformers.__version__); \
	print('prometheus_client', __import__('importlib.metadata', fromlist=['version']).version('prometheus_client'))"

	- name: Run platform-neutral test suite with 100% coverage
	env:
	PYTHONPATH: .
	run: \|
	pytest \
	tests/inference_engine/server/ \
	tests/inference_engine/memory/ \
	tests/inference_engine/scheduler/ \
	tests/inference_engine/pipeline/ \
	tests/inference_engine/session/ \
	tests/training/repr_align/ \
	tests/backends/mlx/test_env.py \
	--cov=inference_engine.server \
	--cov=inference_engine.memory \
	--cov=inference_engine.scheduler \
	--cov=inference_engine.pipeline \
	--cov=inference_engine.session \
	--cov=training.repr_align \
	--cov-report=term \
	--cov-report=xml:coverage.xml \
	--cov-fail-under=100 \
	--junitxml=junit.xml \
	-v

	- name: Upload coverage artifact
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: coverage-py${{ matrix.python-version }}
	path: \|
	coverage.xml
	junit.xml
	if-no-files-found: warn
	retention-days: 14

	package-import-smoke:
	name: package import smoke
	runs-on: ubuntu-latest
	steps:
	- name: Check out
	uses: actions/checkout@v4
	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"
	cache: pip
	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt
	- name: Import every shipping subpackage
	env:
	PYTHONPATH: .
	run: \|
	python -c "import inference_engine; \
	import inference_engine.server; \
	import inference_engine.server.app; \
	import inference_engine.server.config; \
	import inference_engine.server.engine; \
	import inference_engine.server.metrics; \
	import inference_engine.server.errors; \
	import inference_engine.server.auth; \
	import inference_engine.server.tokenizer; \
	import inference_engine.server.streaming; \
	import inference_engine.server.schemas; \
	import inference_engine.memory; \
	import inference_engine.memory.slab; \
	import inference_engine.memory.pool; \
	import inference_engine.scheduler; \
	import inference_engine.scheduler.config; \
	import inference_engine.scheduler.scheduler; \
	import inference_engine.scheduler.session; \
	import inference_engine.pipeline; \
	import inference_engine.pipeline.coordinator; \
	import inference_engine.session; \
	import inference_engine.session.store; \
	import inference_engine.server.grpc_app; \
	import inference_engine.server.proto_gen.kakeya.v1.runtime_pb2; \
	import inference_engine.server.proto_gen.kakeya.v1.runtime_pb2_grpc; \
	import inference_engine.proposer; \
	import inference_engine.proposer.sparse_logits; \
	import inference_engine.backends.mlx.env; \
	import training.repr_align; \
	print('all imports succeeded')"

	docker-build:
	name: docker build + import smoke
	runs-on: ubuntu-latest
	steps:
	- name: Check out
	uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build image
	uses: docker/build-push-action@v6
	with:
	context: .
	file: Dockerfile
	tags: kakeya:ci-${{ github.sha }}
	push: false
	load: true
	cache-from: type=gha
	cache-to: type=gha,mode=max

	- name: Smoke-test image (import only — weights would require HF cache)
	run: \|
	# Confirm the package imports inside the image without
	# touching HF cache.
	docker run --rm \
	--entrypoint python \
	kakeya:ci-${{ github.sha }} \
	-c "import inference_engine.server.app; print('image imports clean')"
	# Confirm the server CLI parses (--help exits 0 before any
	# model load, so we don't need HF cache).
	docker run --rm \
	kakeya:ci-${{ github.sha }} \
	--help

	proto-lint:
	name: proto lint (buf)
	runs-on: ubuntu-latest
	# ADR 0008 PR-A1: the proto/ schema is the wire-contract source of
	# truth for the gRPC runtime + Python / TypeScript SDKs. `buf lint`
	# under the STANDARD ruleset enforces public-style conventions
	# (service / message naming, enum zero-value naming, file-package
	# alignment). `buf format --diff` enforces canonical formatting so
	# the source of truth is also character-stable across editors.
	steps:
	- name: Check out
	uses: actions/checkout@v4

	- name: Install buf
	uses: bufbuild/buf-setup-action@v1
	with:
	version: 1.50.0
	github_token: ${{ secrets.GITHUB_TOKEN }}

	- name: Lint .proto files
	run: buf lint

	- name: Verify .proto formatting is canonical
	run: \|
	# `buf format` is an in-place rewriter; --diff makes it print
	# what it would change instead of editing files (and always
	# exits 0). A non-empty diff is a CI failure.
	diff_output="$(buf format --diff)"
	if [ -n "$diff_output" ]; then
	echo "::error::buf format would have rewritten one or more .proto files."
	echo "Run 'buf format -w' locally and commit the result."
	printf '%s\n' "$diff_output"
	exit 1
	fi

	proto-stub-drift:
	name: proto stub drift
	runs-on: ubuntu-latest
	# ADR 0008 PR-B1: the committed Python stubs under
	# inference_engine/server/proto_gen/ MUST be byte-identical to what
	# scripts/regenerate_proto_stubs.sh produces from the .proto. This
	# job re-runs the script, then `git diff --exit-code` fails CI if
	# anything changed. Catches drift between the .proto contract and
	# the SDK-consumed Python API.
	steps:
	- name: Check out
	uses: actions/checkout@v4

	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"
	cache: pip

	- name: Install grpcio-tools
	run: \|
	python -m pip install --upgrade pip
	# Pin the same grpcio-tools range as requirements.txt so the
	# stubs we regenerate match what production uses. If
	# grpcio-tools updates and starts producing different stub
	# bytes, this job catches it as a drift before merge.
	pip install 'grpcio>=1.65,<2.0' 'grpcio-tools>=1.65,<2.0'

	- name: Regenerate stubs
	run: bash scripts/regenerate_proto_stubs.sh

	- name: Fail if regenerated stubs differ from committed stubs
	run: \|
	if ! git diff --exit-code -- inference_engine/server/proto_gen/; then
	echo "::error::Committed stubs are out of date with proto/."
	echo "Run scripts/regenerate_proto_stubs.sh locally and commit."
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

PR-B2 (ADR 0008 Phase B): AppendTokens + byte-exact prefill-incremental contract #72

Workflow file

PR-B2 (ADR 0008 Phase B): AppendTokens + byte-exact prefill-incremental contract #72

Uh oh!

Workflow file for this run