Make sampler dispatch overridable in ChatSampler.

The gemma Authors · The gemma Authors · commit 44a625e4d787 · 2026-04-27T14:22:59.000-07:00
PiperOrigin-RevId: 906439952
diff --git a/gemma/gm/text/_chat_sampler.py b/gemma/gm/text/_chat_sampler.py
@@ -218,6 +218,46 @@ def gemma4_sampler(self) -> _gemma4_sampler.Gemma4Sampler:
         'Use `sampler` instead.'
     )
 
+  def _sample(
+      self,
+      prompt_text: str,
+      *,
+      images,
+      audio,
+      audio_lengths,
+      sampling,
+      max_new_tokens,
+      rng,
+      last_state,
+      stream,
+      sharding,
+  ):
+    """Dispatches to the correct underlying sampler."""
+    if self._is_gemma4:
+      return self.gemma4_sampler.sample(
+          prompt_text,
+          images=images,
+          audio=audio,
+          audio_lengths=audio_lengths,
+          sampling=sampling,
+          max_new_tokens=max_new_tokens,
+          rng=rng,
+          return_state=True,
+          last_state=last_state,
+          sharding=sharding,
+      )
+    else:
+      return self.sampler.sample(  # pytype: disable=wrong-arg-types
+          prompt_text,
+          images=images,
+          sampling=sampling,
+          max_new_tokens=max_new_tokens,
+          rng=rng,
+          return_state=True,
+          last_state=last_state,
+          stream=bool(stream),
+      )
+
   def chat(
       self,
       prompt: str | dialog.Conversation,
@@ -338,30 +378,18 @@ def chat(
     )
 
     # --- Dispatch to the correct sampler ---
-    if self._is_gemma4:
-      out = self.gemma4_sampler.sample(
-          prompt_text,
-          images=images,
-          audio=audio,
-          audio_lengths=audio_lengths,
-          sampling=sampling,
-          max_new_tokens=max_new_tokens,
-          rng=rng,
-          return_state=True,
-          last_state=last_state,
-          sharding=sharding,
-      )
-    else:
-      out = self.sampler.sample(  # pytype: disable=wrong-arg-types
-          prompt_text,
-          images=images,
-          sampling=sampling,
-          max_new_tokens=max_new_tokens,
-          rng=rng,
-          return_state=True,
-          last_state=last_state,
-          stream=bool(stream),
-      )
+    out = self._sample(
+        prompt_text,
+        images=images,
+        audio=audio,
+        audio_lengths=audio_lengths,
+        sampling=sampling,
+        max_new_tokens=max_new_tokens,
+        rng=rng,
+        last_state=last_state,
+        stream=stream,
+        sharding=sharding,
+    )
 
     # In streaming mode, the output is an iterator, yielding tokens one at a
     # time.
diff --git a/gemma/gm/text/_sampler_test.py b/gemma/gm/text/_sampler_test.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from unittest import mock
 from gemma import gm
+from gemma.gm.text import _sampler
 from gemma.gm.text import _sampler_loop
 import jax
 import jax.numpy as jnp
@@ -54,3 +56,92 @@ def test_sampler():
   )
   sampler.sample('Hello world')
 
+
+def test_chat_sampler_gemma4_dispatch():
+  """Tests that _sample() dispatches to gemma4_sampler when _is_gemma4 is True.
+
+  Uses mocks to verify the dispatch logic without requiring a full Gemma4
+  model. This catches regressions in the _sample() method that could break
+  the Gemma4 path.
+  """
+  model = gm.testing.DummyGemma()
+  params = model.init(
+      jax.random.PRNGKey(0),
+      jnp.zeros((5,), dtype=jnp.int32),
+  )
+  params = params['params']
+  tokenizer = gm.testing.DummyTokenizer()
+  chat_sampler = gm.text.ChatSampler(
+      model=model,
+      params=params,
+      tokenizer=tokenizer,
+      cache_length=128,
+      max_out_length=128,
+  )
+
+  # Force the Gemma4 dispatch path.
+  mock_sample = mock.MagicMock(
+      return_value=_sampler.SamplerOutput(
+          text='mock output',
+          state=mock.MagicMock(),
+      )
+  )
+  with mock.patch.object(
+      type(chat_sampler),
+      '_is_gemma4',
+      new_callable=lambda: property(lambda self: True),
+  ):
+    with mock.patch.object(
+        type(chat_sampler),
+        'gemma4_sampler',
+        new_callable=lambda: property(
+            lambda self: mock.MagicMock(sample=mock_sample)
+        ),
+    ):
+      output = chat_sampler.chat('Hello world')
+      assert isinstance(output, str)
+  # Verify gemma4_sampler.sample was called (not sampler.sample).
+  mock_sample.assert_called_once()
+
+
+def test_chat_sampler_non_gemma4_dispatch():
+  """Tests that _sample() dispatches to sampler when _is_gemma4 is False.
+
+  Uses mocks to verify the dispatch logic without exercising the full sampling
+  pipeline (which is already covered by test_sampler). This catches regressions
+  in _sample() that could break the non-Gemma4 dispatch path.
+  """
+  model = gm.testing.DummyGemma()
+  params = model.init(
+      jax.random.PRNGKey(0),
+      jnp.zeros((5,), dtype=jnp.int32),
+  )
+  params = params['params']
+  tokenizer = gm.testing.DummyTokenizer()
+  chat_sampler = gm.text.ChatSampler(
+      model=model,
+      params=params,
+      tokenizer=tokenizer,
+      cache_length=128,
+      max_out_length=128,
+  )
+
+  assert not chat_sampler._is_gemma4  # Confirm non-Gemma4 dispatch path.
+
+  mock_sample = mock.MagicMock(
+      return_value=_sampler.SamplerOutput(
+          text='mock output',
+          state=mock.MagicMock(),
+      )
+  )
+  with mock.patch.object(
+      type(chat_sampler),
+      'sampler',
+      new_callable=lambda: property(
+          lambda self: mock.MagicMock(sample=mock_sample)
+      ),
+  ):
+    output = chat_sampler.chat('Hello world')
+    assert isinstance(output, str)
+  # Verify sampler.sample was called (not gemma4_sampler.sample).
+  mock_sample.assert_called_once()