1- """Shared fixtures for scheduler tests.
2-
3- Defines local copies of the deterministic test doubles
4- (``DeterministicTokenizer``, ``DeterministicEngine``) so this branch
5- can be tested independently of the E2 server branch. When both land,
6- a follow-up commit consolidates them into a single shared location.
7-
8- These are real concrete classes — not ``unittest.mock`` objects.
1+ """Shared fixtures for the verifier-independent scheduler tests.
2+
3+ PR-N2 retired the ``DeterministicEngine`` + ``DeterministicTokenizer``
4+ test doubles that previously lived here. The scheduler's runtime
5+ behavior — admission control, lifecycle, cancellation, concurrency,
6+ shutdown — moved to ``tests/integration/test_scheduler_real.py``
7+ where it runs against a real ``SpeculativeEngine`` over Qwen3-0.6B.
8+
9+ What stays on Linux: the slab-pool fixtures (verifier-independent;
10+ they describe storage shape, not model behavior). They're consumed by
11+ ``test_scheduler_validation.py`` (argument validation paths that
12+ reject before the engine is touched).
13+
14+ The previously co-located ``test_pooled_verifier.py`` is intentionally
15+ left in place with its own ``_FakeVerifier`` because PR-D2 retires
16+ the ``PooledVerifier`` module entirely (HTTP shim refactor onto
17+ ``SessionStore``); cleaning up the test file before the module
18+ disappears would be throwaway work.
919"""
1020
1121from __future__ import annotations
1222
13- from typing import Any , Callable , List , Optional
14-
1523import pytest
1624import torch
1725
1826from inference_engine .memory .pool import SlabPool
1927from inference_engine .memory .slab import SlabConfig
20- from inference_engine .scheduler .config import AdmissionPolicy , SchedulerConfig
21- from inference_engine .scheduler .scheduler import Scheduler
22-
23-
24- # ---------------------------------------------------------------------------
25- # Test doubles (local copies; identical behaviour to E2's versions)
26- # ---------------------------------------------------------------------------
27-
28-
29- class DeterministicTokenizer :
30- """Minimal HF-AutoTokenizer-shaped tokenizer; word-id mapping."""
31-
32- def __init__ (self ) -> None :
33- self ._token_to_id : dict [str , int ] = {"<|im_end|>" : 0 , "<|unk|>" : 1 }
34- self ._id_to_token : dict [int , str ] = {0 : "<|im_end|>" , 1 : "<|unk|>" }
35- self .eos_token_id : Optional [int ] = 0
36- self .unk_token_id : Optional [int ] = 1
37-
38- def _intern (self , word : str ) -> int :
39- if word not in self ._token_to_id :
40- new_id = len (self ._token_to_id )
41- self ._token_to_id [word ] = new_id
42- self ._id_to_token [new_id ] = word
43- return self ._token_to_id [word ]
44-
45- def apply_chat_template ( # pragma: no cover - unused by scheduler tests
46- self , * args , ** kwargs
47- ) -> Any :
48- raise NotImplementedError
49-
50- def decode ( # pragma: no cover - unused by scheduler tests
51- self , token_ids , * , skip_special_tokens = False
52- ):
53- raise NotImplementedError
54-
55- def convert_tokens_to_ids ( # pragma: no cover - unused by scheduler tests
56- self , token : str
57- ) -> Optional [int ]:
58- return self ._token_to_id .get (token )
59-
60-
61- class DeterministicEngine :
62- """Engine test double emitting a fixed token sequence."""
63-
64- def __init__ (
65- self ,
66- fixed_tokens : List [int ],
67- tokenizer : DeterministicTokenizer ,
68- model_id_label : str = "kakeya-test" ,
69- per_token_delay_s : float = 0.0 ,
70- ) -> None :
71- if not fixed_tokens :
72- raise ValueError ("fixed_tokens must be non-empty" )
73- if per_token_delay_s < 0 :
74- raise ValueError ("per_token_delay_s must be >= 0" )
75- self ._fixed_tokens = list (fixed_tokens )
76- self ._tokenizer = tokenizer
77- self ._model_id_label = model_id_label
78- self ._per_token_delay_s = per_token_delay_s
79-
80- @property
81- def tokenizer (self ) -> DeterministicTokenizer :
82- return self ._tokenizer
83-
84- @property
85- def model_id_label (self ) -> str :
86- return self ._model_id_label
87-
88- def generate (
89- self ,
90- prompt_ids : List [int ],
91- max_new_tokens : int ,
92- eos_token_ids : List [int ],
93- on_token : Optional [Callable [[int ], bool ]] = None ,
94- ):
95- if not prompt_ids :
96- raise ValueError ("prompt_ids must be non-empty" )
97- if max_new_tokens <= 0 :
98- raise ValueError (
99- f"max_new_tokens must be positive, got { max_new_tokens } "
100- )
101- if not eos_token_ids :
102- raise ValueError ("eos_token_ids must be non-empty" )
103- eos_set = set (int (i ) for i in eos_token_ids )
104- emitted : List [int ] = []
105- for tok in self ._fixed_tokens :
106- if len (emitted ) >= max_new_tokens :
107- break
108- if self ._per_token_delay_s > 0 :
109- import time
110- time .sleep (self ._per_token_delay_s )
111- emitted .append (int (tok ))
112- if on_token is not None and on_token (int (tok )):
113- break
114- if int (tok ) in eos_set :
115- break
116-
117- # Lightweight result struct identical to what
118- # SpeculativeDecoder.GenerationResult exposes (only the fields
119- # the scheduler actually reads).
120- class _Result :
121- def __init__ (self , output_token_ids ):
122- self .output_token_ids = output_token_ids
123- self .acceptance_rate = 1.0
124- self .proposer_forward_calls = len (output_token_ids )
125- self .verifier_forward_calls = len (output_token_ids )
126-
127- return _Result (emitted )
128-
129-
130- # ---------------------------------------------------------------------------
131- # Pytest fixtures
132- # ---------------------------------------------------------------------------
13328
13429
13530@pytest .fixture
@@ -148,59 +43,3 @@ def small_pool(slab_config: SlabConfig) -> SlabPool:
14843@pytest .fixture
14944def single_pool (slab_config : SlabConfig ) -> SlabPool :
15045 return SlabPool (num_slabs = 1 , slab_config = slab_config )
151-
152-
153- @pytest .fixture
154- def tokenizer () -> DeterministicTokenizer :
155- return DeterministicTokenizer ()
156-
157-
158- @pytest .fixture
159- def short_engine (tokenizer : DeterministicTokenizer ) -> DeterministicEngine :
160- hello = tokenizer ._intern ("hello" )
161- world = tokenizer ._intern ("world" )
162- bang = tokenizer ._intern ("!" )
163- return DeterministicEngine (
164- fixed_tokens = [hello , world , bang , tokenizer .eos_token_id ],
165- tokenizer = tokenizer ,
166- )
167-
168-
169- @pytest .fixture
170- def long_engine (tokenizer : DeterministicTokenizer ) -> DeterministicEngine :
171- ids = [tokenizer ._intern (f"tok{ i } " ) for i in range (50 )]
172- return DeterministicEngine (
173- fixed_tokens = ids , tokenizer = tokenizer , model_id_label = "long" ,
174- )
175-
176-
177- @pytest .fixture
178- def slow_engine (tokenizer : DeterministicTokenizer ) -> DeterministicEngine :
179- ids = [tokenizer ._intern (f"slow{ i } " ) for i in range (20 )]
180- return DeterministicEngine (
181- fixed_tokens = ids , tokenizer = tokenizer ,
182- model_id_label = "slow" , per_token_delay_s = 0.01 ,
183- )
184-
185-
186- @pytest .fixture
187- def reject_scheduler (short_engine , small_pool ):
188- return Scheduler (
189- engine = short_engine , pool = small_pool ,
190- config = SchedulerConfig (
191- max_concurrent = small_pool .total_count ,
192- admission_policy = AdmissionPolicy .REJECT ,
193- ),
194- )
195-
196-
197- @pytest .fixture
198- def queue_scheduler (short_engine , small_pool ):
199- return Scheduler (
200- engine = short_engine , pool = small_pool ,
201- config = SchedulerConfig (
202- max_concurrent = small_pool .total_count ,
203- admission_policy = AdmissionPolicy .QUEUE ,
204- queue_max_wait_s = 2.0 ,
205- ),
206- )
0 commit comments