fix: use Outlines Generator API instead of logits_processor kwarg (#204)

abrichr · claude · web-flow · commit aca70a2e6fec · 2026-03-28T15:53:59.000-04:00
Outlines v1.2 does NOT work by passing a processor to
model.generate(logits_processor=[...]). It uses its own Generator:

    model = outlines.from_transformers(hf_model, hf_processor)
    gen = outlines.Generator(model, outlines.regex(pattern))
    output = gen(prompt, max_new_tokens=512)

The Generator wraps the model and handles tokenization, constrained
generation, and decoding internally. Prior approach compiled the
processor successfully but it was never actually applied to generation.

Also fixes max_tokens → max_new_tokens (transformers kwarg name).

Tests (35, all pass in 0.09s):
- test_outlines_api_imports: verifies from_transformers, regex, Generator
- test_outlines_regex_compiles: verifies action regex compiles
- test_outlines_generator_api_contract: verifies Generator and
  SteerableGenerator signatures match what the trainer calls
- No slow model download — API contract checks only

Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/openadapt_evals/training/standalone/trainer.py b/openadapt_evals/training/standalone/trainer.py
@@ -110,68 +110,55 @@ def __init__(
     _ACTION_REGEX = (
         r"Thought: [^\n]+\nAction: (" + _ACTION_RE + r")"
     )
-    # Sentinel: None = not yet attempted, list = success, False = failed
-    _constrained_processor_cache: Any = None
+    # Cached outlines Generator (created once, reused for all generate calls)
+    # None = not yet attempted, False = failed, Generator = success
+    _outlines_generator: Any = None
 
-    def _get_constrained_logits_processor(self) -> list | None:
-        """Build an Outlines RegexLogitsProcessor for the action format.
+    def _get_outlines_generator(self) -> Any | None:
+        """Build an Outlines Generator for constrained generation.
 
-        Returns a ``[LogitsProcessor]`` list suitable for passing to
-        ``model.generate(logits_processor=...)``, or ``None`` if Outlines
-        is not installed or compilation fails.
+        Outlines v1.2 uses its own Generator API — NOT model.generate()
+        with a logits_processor kwarg.  The Generator wraps the model and
+        handles tokenization, generation, and decoding internally.
 
-        The processor is cached after first creation (the DFA compilation
-        is expensive — ~2 seconds — but only happens once).
+        Returns the Generator, or None if creation fails.
         """
-        # Already attempted and failed
-        if self._constrained_processor_cache is False:
+        if self._outlines_generator is False:
             return None
-        # Already compiled successfully
-        if isinstance(self._constrained_processor_cache, list):
-            return self._constrained_processor_cache
+        if self._outlines_generator is not None:
+            return self._outlines_generator
 
         try:
-            # Outlines v1.2+ API:
-            # 1. Wrap HF model+tokenizer in outlines.Transformers
-            # 2. Call get_regex_logits_processor(None, wrapped, regex)
-            # The processor is then passed to model.generate(logits_processor=[p])
-            from outlines import Transformers
-            from outlines.generator import get_regex_logits_processor
-
-            raw_tokenizer = (
-                self._processor.tokenizer
-                if hasattr(self._processor, "tokenizer")
-                else self._processor
-            )
-            wrapped_model = Transformers(self._model, raw_tokenizer)
-            processor = get_regex_logits_processor(
-                None,  # use default backend
-                wrapped_model,
-                self._ACTION_REGEX,
+            import outlines
+
+            wrapped_model = outlines.from_transformers(
+                self._model, self._processor,
             )
-            self._constrained_processor_cache = [processor]
+            constraint = outlines.regex(self._ACTION_REGEX)
+            generator = outlines.Generator(wrapped_model, constraint)
+
+            self._outlines_generator = generator
             logger.info(
                 "Outlines constrained decoding enabled "
-                "(regex compiled via %s, processor=%s)",
+                "(model=%s, regex compiled successfully)",
                 type(wrapped_model).__name__,
-                type(processor).__name__,
             )
-            return self._constrained_processor_cache
+            return generator
         except ImportError:
             logger.error(
                 "constrained_decoding=True but 'outlines' is not installed. "
                 "Install with: uv sync --extra training"
             )
-            self._constrained_processor_cache = False
+            self._outlines_generator = False
             return None
         except Exception as exc:
             logger.error(
-                "Outlines logits processor creation failed: %s. "
+                "Outlines Generator creation failed: %s. "
                 "Falling back to unconstrained generation. "
                 "Try: uv pip install -U outlines",
                 exc,
             )
-            self._constrained_processor_cache = False
+            self._outlines_generator = False
             return None
 
     # --- Task loading -----------------------------------------------------
@@ -221,23 +208,43 @@ def _collect_rollout(self, task_id: str, instruction: str) -> Rollout:
             else:
                 text_input = messages[-1]["content"]
 
-            inputs = self._processor(text=[text_input], images=[image], return_tensors="pt")
-            inputs = {k: v.to(self._model.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                generate_kwargs: dict[str, Any] = dict(
+            # --- Generation: constrained (Outlines) or unconstrained (HF) ---
+            outlines_gen = (
+                self._get_outlines_generator()
+                if self._config.constrained_decoding
+                else None
+            )
+            if outlines_gen is not None:
+                # Outlines v1.2 Generator API: handles tokenization,
+                # generation, and decoding internally.  For multimodal
+                # models, pass a dict with "text" + image keys.
+                model_input = {"text": text_input, "images": [image]}
+                decoded = outlines_gen(
+                    model_input,
                     max_new_tokens=self._config.max_new_tokens,
                     temperature=self._config.temperature,
-                    do_sample=True,
                 )
-                # Constrained decoding: force output to match the
-                # action format regex, eliminating unparseable output.
-                if self._config.constrained_decoding:
-                    logits_proc = self._get_constrained_logits_processor()
-                    if logits_proc is not None:
-                        generate_kwargs["logits_processor"] = logits_proc
-                outputs = self._model.generate(**inputs, **generate_kwargs)
-            decoded = self._processor.decode(
-                outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
+                gen_len = len(self._processor.tokenizer.encode(
+                    decoded, add_special_tokens=False,
+                )) if decoded else 0
+            else:
+                # Standard HF generate (no constrained decoding)
+                inputs = self._processor(
+                    text=[text_input], images=[image], return_tensors="pt",
+                )
+                inputs = {k: v.to(self._model.device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    outputs = self._model.generate(
+                        **inputs,
+                        max_new_tokens=self._config.max_new_tokens,
+                        temperature=self._config.temperature,
+                        do_sample=True,
+                    )
+                decoded = self._processor.decode(
+                    outputs[0][inputs["input_ids"].shape[1]:],
+                    skip_special_tokens=True,
+                )
+                gen_len = outputs[0].shape[0] - inputs["input_ids"].shape[1]
             gen_len = outputs[0].shape[0] - inputs["input_ids"].shape[1]
             action = parse_vlm_output_to_action(decoded, screen_size=self._config.screen_size)
 
diff --git a/tests/test_standalone_trainer.py b/tests/test_standalone_trainer.py
@@ -106,98 +106,112 @@ def test_no_bounded_quantifiers_in_regex(self) -> None:
 class TestConstrainedDecodingCache:
     """Test the caching logic for the Outlines logits processor."""
 
-    def test_cache_starts_as_none(self) -> None:
+    def test_generator_cache_starts_as_none(self) -> None:
         config = TrainingConfig()
         trainer = GRPOTrainer(config)
-        assert trainer._constrained_processor_cache is None
+        assert trainer._outlines_generator is None
 
-    def test_failed_cache_returns_none(self) -> None:
-        """When compilation fails, subsequent calls return None (not [])."""
+    def test_failed_generator_returns_none(self) -> None:
+        """When creation fails, subsequent calls return None."""
         config = TrainingConfig(constrained_decoding=True)
         trainer = GRPOTrainer(config)
-        # Simulate a failed compilation
-        trainer._constrained_processor_cache = False
-        result = trainer._get_constrained_logits_processor()
+        trainer._outlines_generator = False
+        result = trainer._get_outlines_generator()
         assert result is None
 
-    def test_successful_cache_returns_list(self) -> None:
-        """When compilation succeeds, subsequent calls return the list."""
+    def test_successful_generator_returns_cached(self) -> None:
+        """When creation succeeds, subsequent calls return the cached generator."""
         config = TrainingConfig(constrained_decoding=True)
         trainer = GRPOTrainer(config)
-        # Simulate a successful compilation
-        trainer._constrained_processor_cache = ["mock_processor"]
-        result = trainer._get_constrained_logits_processor()
-        assert result == ["mock_processor"]
+        trainer._outlines_generator = "mock_generator"
+        result = trainer._get_outlines_generator()
+        assert result == "mock_generator"
 
     def test_outlines_api_imports(self) -> None:
         """Verify the outlines API the trainer depends on is importable.
 
         The trainer uses:
-        - outlines.Transformers (model wrapper)
-        - outlines.generator.get_regex_logits_processor (factory)
+        - outlines.from_transformers (model wrapper factory)
+        - outlines.regex (constraint factory)
+        - outlines.Generator (generation with constraints)
         """
         try:
             import outlines  # noqa: F401
         except ImportError:
             pytest.skip("outlines not installed")
 
-        from outlines import Transformers
-        from outlines.generator import get_regex_logits_processor
-        assert callable(Transformers)
-        assert callable(get_regex_logits_processor)
+        assert callable(outlines.from_transformers)
+        assert callable(outlines.regex)
+        assert callable(outlines.Generator)
 
-    def test_outlines_processor_creation(self) -> None:
-        """Verify a regex logits processor can actually be created.
+    def test_outlines_regex_compiles(self) -> None:
+        """Verify the action regex can be compiled by Outlines.
 
-        This is the integration test that would have caught the prior bugs:
-        - Wrong class name (RegexLogitsProcessor vs OutlinesLogitsProcessor)
-        - Wrong constructor args (tokenizer= kwarg didn't exist)
-
-        Requires a real model, so we use a tiny one or skip.
+        This catches DFA state explosion (bounded quantifiers) and
+        syntax errors in the regex.
         """
         try:
             import outlines
-            import torch
-            from transformers import AutoTokenizer
         except ImportError:
-            pytest.skip("outlines/torch/transformers not installed")
+            pytest.skip("outlines not installed")
 
-        try:
-            # Use the smallest possible tokenizer for fast test
-            tokenizer = AutoTokenizer.from_pretrained(
-                "hf-internal-testing/tiny-random-LlamaForCausalLM",
-                trust_remote_code=True,
-            )
-        except Exception:
-            pytest.skip("Could not load test tokenizer")
+        # This should NOT raise — if it does, the regex is too complex
+        constraint = outlines.regex(GRPOTrainer._ACTION_REGEX)
+        assert constraint is not None
 
-        from outlines.generator import get_regex_logits_processor
+    def test_outlines_generator_api_contract(self) -> None:
+        """Verify the Outlines Generator API contract the trainer depends on.
 
-        # Verify the factory function signature matches what the trainer expects:
-        # get_regex_logits_processor(backend_name, model, regex)
-        import inspect
-        sig = inspect.signature(get_regex_logits_processor)
+        Checks that:
+        1. outlines.from_transformers accepts (model, processor) args
+        2. outlines.regex returns an object Generator accepts
+        3. outlines.Generator returns a callable
+        4. The callable accepts (prompt, max_new_tokens=N) kwargs
+
+        Does NOT load a real model (too slow for CI). Instead verifies
+        the API signatures match what the trainer calls.
+        """
+        try:
+            import outlines
+            import inspect
+        except ImportError:
+            pytest.skip("outlines not installed")
+
+        # 1. from_transformers signature
+        sig = inspect.signature(outlines.from_transformers)
         params = list(sig.parameters.keys())
-        assert len(params) >= 3, (
-            f"get_regex_logits_processor signature changed: {sig}. "
-            f"Expected (backend_name, model, regex), got {params}"
+        assert "model" in params, f"from_transformers missing 'model' param: {params}"
+        assert "tokenizer_or_processor" in params or len(params) >= 2, (
+            f"from_transformers signature changed: {sig}"
         )
 
-    def test_empty_list_no_longer_caches_as_success(self) -> None:
-        """Regression test: empty list [] should NOT be treated as success.
-
-        Prior bug: failure cached [] which is truthy for `is not None`,
-        causing subsequent calls to return [] (no processors applied).
-        """
+        # 2. regex returns something
+        constraint = outlines.regex(r"DONE\(\)")
+        assert constraint is not None
+
+        # 3. Generator signature
+        sig_gen = inspect.signature(outlines.Generator)
+        params_gen = list(sig_gen.parameters.keys())
+        assert "model" in params_gen, f"Generator missing 'model' param: {params_gen}"
+
+        # 4. SteerableGenerator.__call__ accepts **inference_kwargs
+        from outlines.generator import SteerableGenerator
+        sig_call = inspect.signature(SteerableGenerator.__call__)
+        params_call = list(sig_call.parameters.keys())
+        assert "inference_kwargs" in params_call or any(
+            p.startswith("**") or sig_call.parameters[p].kind == inspect.Parameter.VAR_KEYWORD
+            for p in params_call
+        ), f"SteerableGenerator.__call__ doesn't accept **kwargs: {sig_call}"
+
+    def test_false_sentinel_not_confused_with_none(self) -> None:
+        """Regression: False sentinel must return None, not be treated as uninitialized."""
         config = TrainingConfig(constrained_decoding=True)
         trainer = GRPOTrainer(config)
-        # The old buggy behavior would cache [] on failure
-        # Verify the sentinel is False (not []) for failures
-        trainer._constrained_processor_cache = False
-        assert trainer._get_constrained_logits_processor() is None
-        # And [] is actually a valid success cache (with a processor in it)
-        trainer._constrained_processor_cache = ["real_processor"]
-        assert trainer._get_constrained_logits_processor() == ["real_processor"]
+        trainer._outlines_generator = False
+        assert trainer._get_outlines_generator() is None
+        # A real generator object should be returned as-is
+        trainer._outlines_generator = "real_generator"
+        assert trainer._get_outlines_generator() == "real_generator"
 
 
 # ---------------------------------------------------------------------------