@@ -106,98 +106,112 @@ def test_no_bounded_quantifiers_in_regex(self) -> None:
106106class TestConstrainedDecodingCache :
107107 """Test the caching logic for the Outlines logits processor."""
108108
109- def test_cache_starts_as_none (self ) -> None :
109+ def test_generator_cache_starts_as_none (self ) -> None :
110110 config = TrainingConfig ()
111111 trainer = GRPOTrainer (config )
112- assert trainer ._constrained_processor_cache is None
112+ assert trainer ._outlines_generator is None
113113
114- def test_failed_cache_returns_none (self ) -> None :
115- """When compilation fails, subsequent calls return None (not []) ."""
114+ def test_failed_generator_returns_none (self ) -> None :
115+ """When creation fails, subsequent calls return None."""
116116 config = TrainingConfig (constrained_decoding = True )
117117 trainer = GRPOTrainer (config )
118- # Simulate a failed compilation
119- trainer ._constrained_processor_cache = False
120- result = trainer ._get_constrained_logits_processor ()
118+ trainer ._outlines_generator = False
119+ result = trainer ._get_outlines_generator ()
121120 assert result is None
122121
123- def test_successful_cache_returns_list (self ) -> None :
124- """When compilation succeeds, subsequent calls return the list ."""
122+ def test_successful_generator_returns_cached (self ) -> None :
123+ """When creation succeeds, subsequent calls return the cached generator ."""
125124 config = TrainingConfig (constrained_decoding = True )
126125 trainer = GRPOTrainer (config )
127- # Simulate a successful compilation
128- trainer ._constrained_processor_cache = ["mock_processor" ]
129- result = trainer ._get_constrained_logits_processor ()
130- assert result == ["mock_processor" ]
126+ trainer ._outlines_generator = "mock_generator"
127+ result = trainer ._get_outlines_generator ()
128+ assert result == "mock_generator"
131129
132130 def test_outlines_api_imports (self ) -> None :
133131 """Verify the outlines API the trainer depends on is importable.
134132
135133 The trainer uses:
136- - outlines.Transformers (model wrapper)
137- - outlines.generator.get_regex_logits_processor (factory)
134+ - outlines.from_transformers (model wrapper factory)
135+ - outlines.regex (constraint factory)
136+ - outlines.Generator (generation with constraints)
138137 """
139138 try :
140139 import outlines # noqa: F401
141140 except ImportError :
142141 pytest .skip ("outlines not installed" )
143142
144- from outlines import Transformers
145- from outlines .generator import get_regex_logits_processor
146- assert callable (Transformers )
147- assert callable (get_regex_logits_processor )
143+ assert callable (outlines .from_transformers )
144+ assert callable (outlines .regex )
145+ assert callable (outlines .Generator )
148146
149- def test_outlines_processor_creation (self ) -> None :
150- """Verify a regex logits processor can actually be created .
147+ def test_outlines_regex_compiles (self ) -> None :
148+ """Verify the action regex can be compiled by Outlines .
151149
152- This is the integration test that would have caught the prior bugs:
153- - Wrong class name (RegexLogitsProcessor vs OutlinesLogitsProcessor)
154- - Wrong constructor args (tokenizer= kwarg didn't exist)
155-
156- Requires a real model, so we use a tiny one or skip.
150+ This catches DFA state explosion (bounded quantifiers) and
151+ syntax errors in the regex.
157152 """
158153 try :
159154 import outlines
160- import torch
161- from transformers import AutoTokenizer
162155 except ImportError :
163- pytest .skip ("outlines/torch/transformers not installed" )
156+ pytest .skip ("outlines not installed" )
164157
165- try :
166- # Use the smallest possible tokenizer for fast test
167- tokenizer = AutoTokenizer .from_pretrained (
168- "hf-internal-testing/tiny-random-LlamaForCausalLM" ,
169- trust_remote_code = True ,
170- )
171- except Exception :
172- pytest .skip ("Could not load test tokenizer" )
158+ # This should NOT raise — if it does, the regex is too complex
159+ constraint = outlines .regex (GRPOTrainer ._ACTION_REGEX )
160+ assert constraint is not None
173161
174- from outlines .generator import get_regex_logits_processor
162+ def test_outlines_generator_api_contract (self ) -> None :
163+ """Verify the Outlines Generator API contract the trainer depends on.
175164
176- # Verify the factory function signature matches what the trainer expects:
177- # get_regex_logits_processor(backend_name, model, regex)
178- import inspect
179- sig = inspect .signature (get_regex_logits_processor )
165+ Checks that:
166+ 1. outlines.from_transformers accepts (model, processor) args
167+ 2. outlines.regex returns an object Generator accepts
168+ 3. outlines.Generator returns a callable
169+ 4. The callable accepts (prompt, max_new_tokens=N) kwargs
170+
171+ Does NOT load a real model (too slow for CI). Instead verifies
172+ the API signatures match what the trainer calls.
173+ """
174+ try :
175+ import outlines
176+ import inspect
177+ except ImportError :
178+ pytest .skip ("outlines not installed" )
179+
180+ # 1. from_transformers signature
181+ sig = inspect .signature (outlines .from_transformers )
180182 params = list (sig .parameters .keys ())
181- assert len ( params ) >= 3 , (
182- f"get_regex_logits_processor signature changed: { sig } . "
183- f"Expected (backend_name, model, regex), got { params } "
183+ assert "model" in params , f"from_transformers missing 'model' param: { params } "
184+ assert "tokenizer_or_processor" in params or len ( params ) >= 2 , (
185+ f"from_transformers signature changed: { sig } "
184186 )
185187
186- def test_empty_list_no_longer_caches_as_success (self ) -> None :
187- """Regression test: empty list [] should NOT be treated as success.
188-
189- Prior bug: failure cached [] which is truthy for `is not None`,
190- causing subsequent calls to return [] (no processors applied).
191- """
188+ # 2. regex returns something
189+ constraint = outlines .regex (r"DONE\(\)" )
190+ assert constraint is not None
191+
192+ # 3. Generator signature
193+ sig_gen = inspect .signature (outlines .Generator )
194+ params_gen = list (sig_gen .parameters .keys ())
195+ assert "model" in params_gen , f"Generator missing 'model' param: { params_gen } "
196+
197+ # 4. SteerableGenerator.__call__ accepts **inference_kwargs
198+ from outlines .generator import SteerableGenerator
199+ sig_call = inspect .signature (SteerableGenerator .__call__ )
200+ params_call = list (sig_call .parameters .keys ())
201+ assert "inference_kwargs" in params_call or any (
202+ p .startswith ("**" ) or sig_call .parameters [p ].kind == inspect .Parameter .VAR_KEYWORD
203+ for p in params_call
204+ ), f"SteerableGenerator.__call__ doesn't accept **kwargs: { sig_call } "
205+
206+ def test_false_sentinel_not_confused_with_none (self ) -> None :
207+ """Regression: False sentinel must return None, not be treated as uninitialized."""
192208 config = TrainingConfig (constrained_decoding = True )
193209 trainer = GRPOTrainer (config )
194- # The old buggy behavior would cache [] on failure
195- # Verify the sentinel is False (not []) for failures
196- trainer ._constrained_processor_cache = False
197- assert trainer ._get_constrained_logits_processor () is None
198- # And [] is actually a valid success cache (with a processor in it)
199- trainer ._constrained_processor_cache = ["real_processor" ]
200- assert trainer ._get_constrained_logits_processor () == ["real_processor" ]
210+ trainer ._outlines_generator = False
211+ assert trainer ._get_outlines_generator () is None
212+ # A real generator object should be returned as-is
213+ trainer ._outlines_generator = "real_generator"
214+ assert trainer ._get_outlines_generator () == "real_generator"
201215
202216
203217# ---------------------------------------------------------------------------
0 commit comments