Skip to content

Commit e8e8415

Browse files
authored
Merge pull request #56 from FluffyAIcode/AgentMemory/v030-pr-n4-sdk-conftest-stub-cleanup-8e7f
PR-N4: remove SDK conftest stub + finalize no-doubles cleanup
2 parents 36e5dab + f44cc80 commit e8e8415

7 files changed

Lines changed: 361 additions & 836 deletions

File tree

.github/workflows/ci.yaml

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,21 @@ jobs:
7272
# PYTHONPATH route avoids a setuptools build step in CI.
7373
PYTHONPATH: .:sdks/python
7474
run: |
75-
# PR-N1/N2/N3 (ADR 0008) cleanup: this gate covers ONLY
75+
# PR-N1/N2/N3/N4 (ADR 0008) cleanup: this gate covers ONLY
7676
# verifier-independent code. The Linux runner cannot load
7777
# real Qwen3 weights; the cleanup PRs retired the
7878
# FakeVerifier / DeterministicEngine / DeterministicTokenizer
79-
# test doubles. Engine-dependent modules — currently
80-
# ``inference_engine.session.coordinator``,
79+
# / _MinimalVerifierStub test doubles. Verifier-dependent
80+
# modules — ``inference_engine.session.coordinator``,
8181
# ``inference_engine.session.generator``,
8282
# ``inference_engine.scheduler.scheduler``,
83-
# ``inference_engine.server.app``,
84-
# ``inference_engine.server.engine``,
85-
# ``inference_engine.server.tokenizer`` — move to the
83+
# ``inference_engine.server.{app, engine, tokenizer, streaming}``,
84+
# ``kakeya.{client, session}`` — move to the
8685
# tests/integration/ suite, gated on Mac M4 / CUDA hosts.
8786
#
8887
# Coverage is invoked via ``coverage run -m pytest`` rather
8988
# than ``pytest --cov=`` to avoid a torch+pytest-cov race
90-
# at conftest-import time.
89+
# at conftest-import time on the hosted Linux runner.
9190
coverage run -m pytest \
9291
tests/inference_engine/server/ \
9392
tests/inference_engine/memory/ \
@@ -101,10 +100,10 @@ jobs:
101100
--junitxml=junit.xml \
102101
-v
103102
coverage report \
104-
--include='inference_engine/server/auth.py,inference_engine/server/config.py,inference_engine/server/errors.py,inference_engine/server/grpc_app.py,inference_engine/server/metrics.py,inference_engine/server/schemas.py,inference_engine/server/proto_gen/**/*.py,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/*,training/repr_align/*' \
103+
--include='inference_engine/server/auth.py,inference_engine/server/config.py,inference_engine/server/errors.py,inference_engine/server/grpc_app.py,inference_engine/server/metrics.py,inference_engine/server/schemas.py,inference_engine/server/proto_gen/**/*.py,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/__init__.py,sdks/python/kakeya/errors.py,training/repr_align/*' \
105104
--fail-under=100
106105
coverage xml -o coverage.xml \
107-
--include='inference_engine/server/auth.py,inference_engine/server/config.py,inference_engine/server/errors.py,inference_engine/server/grpc_app.py,inference_engine/server/metrics.py,inference_engine/server/schemas.py,inference_engine/server/proto_gen/**/*.py,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/*,training/repr_align/*'
106+
--include='inference_engine/server/auth.py,inference_engine/server/config.py,inference_engine/server/errors.py,inference_engine/server/grpc_app.py,inference_engine/server/metrics.py,inference_engine/server/schemas.py,inference_engine/server/proto_gen/**/*.py,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/__init__.py,sdks/python/kakeya/errors.py,training/repr_align/*'
108107
109108
- name: Upload coverage artifact
110109
if: always()

scripts/review_pr_n4_on_mac.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/usr/bin/env bash
2+
# Mac M4 review aid for PR-N4 (no-test-doubles cleanup, FINAL).
3+
#
4+
# PR-N4 retires the last verifier-protocol stand-in: the
5+
# ``_MinimalVerifierStub`` (formerly ``FakeVerifier`` import) in
6+
# ``tests/sdk/python/conftest.py``. The SDK transport tests
7+
# (Client + Session) move to ``tests/integration/test_sdk_real.py``
8+
# where they run against a real Qwen3-0.6B-backed gRPC runtime.
9+
#
10+
# After PR-N4: NO test doubles remain in the Linux test tree
11+
# implementing the verifier / engine / tokenizer protocols. The
12+
# Linux CI gate covers ONLY truly verifier-independent code; the
13+
# integration suite is the binding gate for runtime correctness.
14+
#
15+
# Produces 1 artifact:
16+
#
17+
# results/platform-tests/pr-n4-mac-integration-tests-<unix>.json
18+
# pytest -m integration tests/integration/ — runs the full
19+
# accumulated integration suite (PR-E1 INV-3 + PR-N1 coordinator/
20+
# generator + PR-N2 scheduler + PR-N3 http_shim/engine/tokenizer/
21+
# streaming + PR-N4 SDK).
22+
#
23+
# Usage (from repo root, on Mac M4):
24+
#
25+
# bash scripts/review_pr_n4_on_mac.sh
26+
#
27+
# Then commit:
28+
#
29+
# git add results/platform-tests/pr-n4-mac-*
30+
# git commit -m "Mac M4 review evidence for PR-N4"
31+
# git push
32+
33+
set -euo pipefail
34+
35+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
36+
cd "$ROOT"
37+
38+
stamp="$(date +%s)"
39+
out_dir="results/platform-tests"
40+
mkdir -p "$out_dir"
41+
42+
junit="$out_dir/pr-n4-mac-integration-tests-${stamp}.junit.xml"
43+
report="$out_dir/pr-n4-mac-integration-tests-${stamp}.json"
44+
45+
echo "==> integration suite (full accumulated PR-N1..N4 + PR-E1 GA gate)"
46+
PYTHONPATH=.:sdks/python python3 -m pytest \
47+
-m integration \
48+
tests/integration/ \
49+
--junitxml="$junit" \
50+
-v
51+
52+
PYTHONPATH=.:sdks/python python3 - "$junit" "$report" <<'PY'
53+
import json
54+
import platform
55+
import sys
56+
import xml.etree.ElementTree as ET
57+
junit_path, out_path = sys.argv[1:3]
58+
jr = ET.parse(junit_path).getroot()
59+
testsuites = list(jr.iter("testsuite"))
60+
total_tests = sum(int(ts.get("tests", "0")) for ts in testsuites)
61+
total_failures = sum(int(ts.get("failures", "0")) for ts in testsuites)
62+
total_errors = sum(int(ts.get("errors", "0")) for ts in testsuites)
63+
total_skipped = sum(int(ts.get("skipped", "0")) for ts in testsuites)
64+
report = {
65+
"schema_version": 1,
66+
"kind": "pr_n4_mac_integration_tests",
67+
"host": {
68+
"platform": platform.platform(),
69+
"machine": platform.machine(),
70+
"python": platform.python_version(),
71+
},
72+
"junit": {
73+
"tests": total_tests, "failures": total_failures,
74+
"errors": total_errors, "skipped": total_skipped,
75+
},
76+
}
77+
with open(out_path, "w", encoding="utf-8") as fh:
78+
json.dump(report, fh, indent=2)
79+
print(f" -> {out_path}")
80+
PY
81+
82+
echo
83+
echo "==> Done. Commit:"
84+
echo " git add $out_dir/pr-n4-mac-*"
85+
echo " git commit -m 'Mac M4 review evidence for PR-N4'"
86+
echo " git push"

tests/integration/conftest.py

Lines changed: 118 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
"""Shared fixtures and marker plumbing for the integration suite.
22
33
Tests under ``tests/integration/`` exercise the v0.3 runtime against
4-
**real** model weights — the same Qwen3-0.6B verifier used by
5-
``tests/core/``. They are NOT part of the Linux unit-test gate
6-
(coverage is platform-neutral; loading real weights is HF-cache- and
7-
hardware-bound), and are NOT auto-discovered by a bare ``pytest``
8-
invocation: every test in this directory carries the
9-
``@pytest.mark.integration`` marker, and you opt in with::
10-
11-
pytest -m integration tests/integration/
12-
13-
Per ADR 0008 §9, this suite is the binding GA gate. PR-E2 (a future
14-
PR) will add a self-hosted Mac M4 GitHub Actions workflow that runs
15-
``pytest -m integration`` on every PR labelled ``needs-mac-m4``;
16-
until that workflow lands, contributors run the suite manually on
17-
Mac M4 and push the resulting JSON / JUnit reports to the PR branch.
4+
**real** model weights — typically Qwen3-0.6B from the HF cache.
5+
They are NOT part of the Linux unit-test gate (model loading is
6+
HF-cache- and hardware-bound) and are NOT auto-discovered by a bare
7+
``pytest``: every test in this directory gets the
8+
``@pytest.mark.integration`` marker auto-applied below, and you opt
9+
in with ``pytest -m integration tests/integration/``.
10+
11+
This conftest is created independently by PR-E1, PR-N1, PR-N2, PR-N3,
12+
and PR-N4 (they all branched off main while none had merged yet);
13+
the file content is the union and de-duplicates cleanly because each
14+
PR appends its own real-engine / real-runtime fixtures.
15+
16+
Per ADR 0008 §9: this suite is the binding GA gate. Mac M4 reviewer
17+
scripts (``scripts/review_pr_n*_on_mac.sh``) drive it manually
18+
until PR-E2 ships the self-hosted runner workflow.
1819
"""
1920

2021
from __future__ import annotations
@@ -80,3 +81,106 @@ def real_speculative_engine():
8081
tokenizer=verifier.tokenizer,
8182
model_id_label="kakeya-integration",
8283
)
84+
85+
86+
# ---------------------------------------------------------------------------
87+
# Real gRPC runtime fixture — used by PR-N4's SDK integration tests.
88+
# An in-process gRPC server backed by a real verifier on a background
89+
# thread, yielding the host:port string the SDK can connect to.
90+
# ---------------------------------------------------------------------------
91+
92+
93+
@pytest.fixture(scope="session")
94+
def real_grpc_runtime_address():
95+
"""Run an in-process gRPC ``RuntimeService`` backed by a real
96+
Qwen3-0.6B :class:`SinkWindowVerifier` on a background thread.
97+
98+
Yields the ``host:port`` address string the SDK can connect to.
99+
Session-scoped: model load (~3-5 s on CPU) is paid once. Each
100+
integration SDK test creates its own session via the SDK; the
101+
underlying verifier is shared and reset on each ``prefill`` call.
102+
"""
103+
import asyncio
104+
import threading
105+
import time
106+
107+
import grpc
108+
import torch
109+
110+
from inference_engine.server.grpc_app import RuntimeServiceServicer
111+
from inference_engine.server.proto_gen.kakeya.v1 import (
112+
runtime_pb2_grpc,
113+
)
114+
from inference_engine.session import (
115+
AppendTokensCoordinator,
116+
GenerationCoordinator,
117+
SessionStore,
118+
)
119+
from kv_cache_proposer.verifier import SinkWindowVerifier, VerifierConfig
120+
121+
verifier_cfg = VerifierConfig(
122+
model_id="Qwen/Qwen3-0.6B",
123+
dtype=torch.bfloat16, device="cpu",
124+
sink_size=4, window_size=64,
125+
)
126+
verifier = SinkWindowVerifier(verifier_cfg)
127+
store = SessionStore(capacity=4, cache_inspector=verifier)
128+
append_coord = AppendTokensCoordinator(store, verifier)
129+
gen_coord = GenerationCoordinator(store, verifier)
130+
131+
loop = asyncio.new_event_loop()
132+
holder: dict = {
133+
"server": None,
134+
"port": None,
135+
"started": threading.Event(),
136+
}
137+
138+
async def _serve():
139+
# Build the server INSIDE the worker thread's loop so any
140+
# internal asyncio.Future is bound to this loop, not the
141+
# main-thread default loop (the "Future attached to a
142+
# different loop" failure PR-B4 hit).
143+
server = grpc.aio.server()
144+
runtime_pb2_grpc.add_RuntimeServiceServicer_to_server(
145+
RuntimeServiceServicer(
146+
store,
147+
append_coordinator=append_coord,
148+
generation_coordinator=gen_coord,
149+
),
150+
server,
151+
)
152+
holder["server"] = server
153+
holder["port"] = server.add_insecure_port("127.0.0.1:0")
154+
await server.start()
155+
holder["started"].set()
156+
await server.wait_for_termination()
157+
158+
def _run():
159+
asyncio.set_event_loop(loop)
160+
loop.run_until_complete(_serve())
161+
162+
thread = threading.Thread(target=_run, daemon=True)
163+
thread.start()
164+
if not holder["started"].wait(timeout=15.0):
165+
raise RuntimeError(
166+
"background gRPC runtime failed to start within 15s",
167+
)
168+
169+
address = f"127.0.0.1:{holder['port']}"
170+
try:
171+
yield address
172+
finally:
173+
async def _shutdown():
174+
await holder["server"].stop(grace=0.1)
175+
176+
try:
177+
fut = asyncio.run_coroutine_threadsafe(_shutdown(), loop)
178+
fut.result(timeout=2.0)
179+
except Exception: # pragma: no cover - best-effort cleanup
180+
pass
181+
thread.join(timeout=2.0)
182+
time.sleep(0.05)
183+
try:
184+
loop.close()
185+
except Exception: # pragma: no cover - best-effort cleanup
186+
pass

0 commit comments

Comments
 (0)