FluffyAIcode
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 16 additions & 11 deletions b/‎.github/workflows/ci.yaml‎
Lines changed: 16 additions & 11 deletions
diff --git a/‎scripts/review_pr_n2_on_mac.sh‎
Lines changed: 88 additions & 0 deletions b/‎scripts/review_pr_n2_on_mac.sh‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎tests/inference_engine/scheduler/conftest.py‎
Lines changed: 18 additions & 179 deletions b/‎tests/inference_engine/scheduler/conftest.py‎
Lines changed: 18 additions & 179 deletions
@@ -72,7 +72,17 @@ jobs:
           # PYTHONPATH route avoids a setuptools build step in CI.
           PYTHONPATH: .:sdks/python
         run: |
-          pytest \
+          # PR-N2 (ADR 0008) cleanup: this gate covers ONLY
+          # verifier-independent code. The Linux runner cannot load
+          # real Qwen3 weights; PR-N2 retired the DeterministicEngine
+          # + DeterministicTokenizer test doubles that previously
+          # stood in for them. Engine / scheduler runtime tests
+          # moved to tests/integration/ (Mac M4 / CUDA gate).
+          #
+          # Coverage is invoked via ``coverage run -m pytest`` not
+          # ``pytest --cov=`` to avoid a torch+pytest-cov race at
+          # conftest-import time on the hosted Linux runner.
+          coverage run -m pytest \
             tests/inference_engine/server/ \
             tests/inference_engine/memory/ \
             tests/inference_engine/scheduler/ \
@@ -81,18 +91,13 @@ jobs:
             tests/sdk/python/ \
             tests/training/repr_align/ \
             tests/backends/mlx/test_env.py \
-            --cov=inference_engine.server \
-            --cov=inference_engine.memory \
-            --cov=inference_engine.scheduler \
-            --cov=inference_engine.pipeline \
-            --cov=inference_engine.session \
-            --cov=kakeya \
-            --cov=training.repr_align \
-            --cov-report=term \
-            --cov-report=xml:coverage.xml \
-            --cov-fail-under=100 \
             --junitxml=junit.xml \
             -v
+          coverage report \
+            --include='inference_engine/server/*,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/*,training/repr_align/*' \
+            --fail-under=100
+          coverage xml -o coverage.xml \
+            --include='inference_engine/server/*,inference_engine/memory/*,inference_engine/scheduler/config.py,inference_engine/scheduler/session.py,inference_engine/scheduler/pooled_verifier.py,inference_engine/pipeline/*,inference_engine/session/store.py,sdks/python/kakeya/*,training/repr_align/*'
 
       - name: Upload coverage artifact
         if: always()
 
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# Mac M4 review aid for PR-N2 (no-test-doubles cleanup, scope =
+# DeterministicEngine + DeterministicTokenizer in scheduler/conftest.py
+# + the test_scheduler.py tests that depended on them).
+#
+# PR-N2 retired the scheduler/-side ``DeterministicEngine`` and
+# ``DeterministicTokenizer`` test doubles. Their dispatch /
+# admission-control / lifecycle tests moved to
+# tests/integration/test_scheduler_real.py, where they run against
+# the real ``SpeculativeEngine`` over Qwen3-0.6B.
+#
+# The HTTP shim's separate copy of these doubles (in
+# ``tests/inference_engine/server/conftest.py``) and the engine-
+# subtype doubles (``_RaisingEngine``, ``_ProxyEngine``, etc.) are
+# PR-N3 scope and remain in place on this branch.
+#
+# Produces 1 artifact:
+#
+#   results/platform-tests/pr-n2-mac-integration-tests-<unix>.json
+#     pytest -m integration tests/integration/test_scheduler_real.py
+#     against real Qwen3-0.6B + SpeculativeEngine. Acceptance: all
+#     pass; structural invariants hold (state transitions, slab
+#     acquire/release, admission control, concurrency).
+#
+# Usage (from repo root, on Mac M4):
+#
+#     bash scripts/review_pr_n2_on_mac.sh
+#
+# Then commit:
+#
+#     git add results/platform-tests/pr-n2-mac-*
+#     git commit -m "Mac M4 review evidence for PR-N2"
+#     git push
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT"
+
+stamp="$(date +%s)"
+out_dir="results/platform-tests"
+mkdir -p "$out_dir"
+
+junit="$out_dir/pr-n2-mac-integration-tests-${stamp}.junit.xml"
+report="$out_dir/pr-n2-mac-integration-tests-${stamp}.json"
+
+echo "==> integration suite (PR-N2 migrated scheduler tests + INV-3 GA gate)"
+PYTHONPATH=.:sdks/python python3 -m pytest \
+    -m integration \
+    tests/integration/ \
+    --junitxml="$junit" \
+    -v
+
+PYTHONPATH=.:sdks/python python3 - "$junit" "$report" <<'PY'
+import json
+import platform
+import sys
+import xml.etree.ElementTree as ET
+junit_path, out_path = sys.argv[1:3]
+jr = ET.parse(junit_path).getroot()
+testsuites = list(jr.iter("testsuite"))
+total_tests = sum(int(ts.get("tests", "0")) for ts in testsuites)
+total_failures = sum(int(ts.get("failures", "0")) for ts in testsuites)
+total_errors = sum(int(ts.get("errors", "0")) for ts in testsuites)
+total_skipped = sum(int(ts.get("skipped", "0")) for ts in testsuites)
+report = {
+    "schema_version": 1,
+    "kind": "pr_n2_mac_integration_tests",
+    "host": {
+        "platform": platform.platform(),
+        "machine": platform.machine(),
+        "python": platform.python_version(),
+    },
+    "junit": {
+        "tests": total_tests, "failures": total_failures,
+        "errors": total_errors, "skipped": total_skipped,
+    },
+}
+with open(out_path, "w", encoding="utf-8") as fh:
+    json.dump(report, fh, indent=2)
+print(f"  -> {out_path}")
+PY
+
+echo
+echo "==> Done. Commit:"
+echo "    git add $out_dir/pr-n2-mac-*"
+echo "    git commit -m 'Mac M4 review evidence for PR-N2'"
+echo "    git push"
@@ -1,135 +1,30 @@
-"""Shared fixtures for scheduler tests.
-
-Defines local copies of the deterministic test doubles
-(``DeterministicTokenizer``, ``DeterministicEngine``) so this branch
-can be tested independently of the E2 server branch. When both land,
-a follow-up commit consolidates them into a single shared location.
-
-These are real concrete classes — not ``unittest.mock`` objects.
+"""Shared fixtures for the verifier-independent scheduler tests.
+
+PR-N2 retired the ``DeterministicEngine`` + ``DeterministicTokenizer``
+test doubles that previously lived here. The scheduler's runtime
+behavior — admission control, lifecycle, cancellation, concurrency,
+shutdown — moved to ``tests/integration/test_scheduler_real.py``
+where it runs against a real ``SpeculativeEngine`` over Qwen3-0.6B.
+
+What stays on Linux: the slab-pool fixtures (verifier-independent;
+they describe storage shape, not model behavior). They're consumed by
+``test_scheduler_validation.py`` (argument validation paths that
+reject before the engine is touched).
+
+The previously co-located ``test_pooled_verifier.py`` is intentionally
+left in place with its own ``_FakeVerifier`` because PR-D2 retires
+the ``PooledVerifier`` module entirely (HTTP shim refactor onto
+``SessionStore``); cleaning up the test file before the module
+disappears would be throwaway work.
 """
 
 from __future__ import annotations
 
-from typing import Any, Callable, List, Optional
-
 import pytest
 import torch
 
 from inference_engine.memory.pool import SlabPool
 from inference_engine.memory.slab import SlabConfig
-from inference_engine.scheduler.config import AdmissionPolicy, SchedulerConfig
-from inference_engine.scheduler.scheduler import Scheduler
-
-
-# ---------------------------------------------------------------------------
-# Test doubles (local copies; identical behaviour to E2's versions)
-# ---------------------------------------------------------------------------
-
-
-class DeterministicTokenizer:
-    """Minimal HF-AutoTokenizer-shaped tokenizer; word-id mapping."""
-
-    def __init__(self) -> None:
-        self._token_to_id: dict[str, int] = {"<|im_end|>": 0, "<|unk|>": 1}
-        self._id_to_token: dict[int, str] = {0: "<|im_end|>", 1: "<|unk|>"}
-        self.eos_token_id: Optional[int] = 0
-        self.unk_token_id: Optional[int] = 1
-
-    def _intern(self, word: str) -> int:
-        if word not in self._token_to_id:
-            new_id = len(self._token_to_id)
-            self._token_to_id[word] = new_id
-            self._id_to_token[new_id] = word
-        return self._token_to_id[word]
-
-    def apply_chat_template(  # pragma: no cover - unused by scheduler tests
-        self, *args, **kwargs
-    ) -> Any:
-        raise NotImplementedError
-
-    def decode(  # pragma: no cover - unused by scheduler tests
-        self, token_ids, *, skip_special_tokens=False
-    ):
-        raise NotImplementedError
-
-    def convert_tokens_to_ids(  # pragma: no cover - unused by scheduler tests
-        self, token: str
-    ) -> Optional[int]:
-        return self._token_to_id.get(token)
-
-
-class DeterministicEngine:
-    """Engine test double emitting a fixed token sequence."""
-
-    def __init__(
-        self,
-        fixed_tokens: List[int],
-        tokenizer: DeterministicTokenizer,
-        model_id_label: str = "kakeya-test",
-        per_token_delay_s: float = 0.0,
-    ) -> None:
-        if not fixed_tokens:
-            raise ValueError("fixed_tokens must be non-empty")
-        if per_token_delay_s < 0:
-            raise ValueError("per_token_delay_s must be >= 0")
-        self._fixed_tokens = list(fixed_tokens)
-        self._tokenizer = tokenizer
-        self._model_id_label = model_id_label
-        self._per_token_delay_s = per_token_delay_s
-
-    @property
-    def tokenizer(self) -> DeterministicTokenizer:
-        return self._tokenizer
-
-    @property
-    def model_id_label(self) -> str:
-        return self._model_id_label
-
-    def generate(
-        self,
-        prompt_ids: List[int],
-        max_new_tokens: int,
-        eos_token_ids: List[int],
-        on_token: Optional[Callable[[int], bool]] = None,
-    ):
-        if not prompt_ids:
-            raise ValueError("prompt_ids must be non-empty")
-        if max_new_tokens <= 0:
-            raise ValueError(
-                f"max_new_tokens must be positive, got {max_new_tokens}"
-            )
-        if not eos_token_ids:
-            raise ValueError("eos_token_ids must be non-empty")
-        eos_set = set(int(i) for i in eos_token_ids)
-        emitted: List[int] = []
-        for tok in self._fixed_tokens:
-            if len(emitted) >= max_new_tokens:
-                break
-            if self._per_token_delay_s > 0:
-                import time
-                time.sleep(self._per_token_delay_s)
-            emitted.append(int(tok))
-            if on_token is not None and on_token(int(tok)):
-                break
-            if int(tok) in eos_set:
-                break
-
-        # Lightweight result struct identical to what
-        # SpeculativeDecoder.GenerationResult exposes (only the fields
-        # the scheduler actually reads).
-        class _Result:
-            def __init__(self, output_token_ids):
-                self.output_token_ids = output_token_ids
-                self.acceptance_rate = 1.0
-                self.proposer_forward_calls = len(output_token_ids)
-                self.verifier_forward_calls = len(output_token_ids)
-
-        return _Result(emitted)
-
-
-# ---------------------------------------------------------------------------
-# Pytest fixtures
-# ---------------------------------------------------------------------------
 
 
 @pytest.fixture
@@ -148,59 +43,3 @@ def small_pool(slab_config: SlabConfig) -> SlabPool:
 @pytest.fixture
 def single_pool(slab_config: SlabConfig) -> SlabPool:
     return SlabPool(num_slabs=1, slab_config=slab_config)
-
-
-@pytest.fixture
-def tokenizer() -> DeterministicTokenizer:
-    return DeterministicTokenizer()
-
-
-@pytest.fixture
-def short_engine(tokenizer: DeterministicTokenizer) -> DeterministicEngine:
-    hello = tokenizer._intern("hello")
-    world = tokenizer._intern("world")
-    bang = tokenizer._intern("!")
-    return DeterministicEngine(
-        fixed_tokens=[hello, world, bang, tokenizer.eos_token_id],
-        tokenizer=tokenizer,
-    )
-
-
-@pytest.fixture
-def long_engine(tokenizer: DeterministicTokenizer) -> DeterministicEngine:
-    ids = [tokenizer._intern(f"tok{i}") for i in range(50)]
-    return DeterministicEngine(
-        fixed_tokens=ids, tokenizer=tokenizer, model_id_label="long",
-    )
-
-
-@pytest.fixture
-def slow_engine(tokenizer: DeterministicTokenizer) -> DeterministicEngine:
-    ids = [tokenizer._intern(f"slow{i}") for i in range(20)]
-    return DeterministicEngine(
-        fixed_tokens=ids, tokenizer=tokenizer,
-        model_id_label="slow", per_token_delay_s=0.01,
-    )
-
-
-@pytest.fixture
-def reject_scheduler(short_engine, small_pool):
-    return Scheduler(
-        engine=short_engine, pool=small_pool,
-        config=SchedulerConfig(
-            max_concurrent=small_pool.total_count,
-            admission_policy=AdmissionPolicy.REJECT,
-        ),
-    )
-
-
-@pytest.fixture
-def queue_scheduler(short_engine, small_pool):
-    return Scheduler(
-        engine=short_engine, pool=small_pool,
-        config=SchedulerConfig(
-            max_concurrent=small_pool.total_count,
-            admission_policy=AdmissionPolicy.QUEUE,
-            queue_max_wait_s=2.0,
-        ),
-    )