Fix HFInferenceParams missing is_compileable for transformers 5.x compatibility

svc-bionemo · svc-bionemo · commit 655b76267ca5 · 2026-04-17T10:34:18.000-07:00
transformers 5.5.4 (introduced via PyTorch 26.03 base container) added an
is_compileable property check on cache objects in generate(). Add is_compileable
returning False to HFInferenceParams in all model files (llama3, qwen2, qwen3,
mixtral) and their recipe copies.

Signed-off-by: svc-bionemo &lt;svc-bionemo@nvidia.com&gt;
Signed-off-by: svc-bionemo &lt;267129667+svc-bionemo@users.noreply.github.com&gt;
diff --git a/bionemo-recipes/models/llama3/modeling_llama_te.py b/bionemo-recipes/models/llama3/modeling_llama_te.py
@@ -591,3 +591,8 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_key_cache = key_cache.index_select(0, beam_idx)
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
+
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False
diff --git a/bionemo-recipes/models/mixtral/modeling_mixtral_te.py b/bionemo-recipes/models/mixtral/modeling_mixtral_te.py
@@ -876,6 +876,11 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
 
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False
+
 
 @torch.compile(fullgraph=True)
 def _build_expert_sort_indices(recv_counts: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
diff --git a/bionemo-recipes/models/qwen/modeling_qwen2_te.py b/bionemo-recipes/models/qwen/modeling_qwen2_te.py
@@ -576,3 +576,8 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_key_cache = key_cache.index_select(0, beam_idx)
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
+
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False
diff --git a/bionemo-recipes/models/qwen/modeling_qwen3_te.py b/bionemo-recipes/models/qwen/modeling_qwen3_te.py
@@ -586,3 +586,8 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_key_cache = key_cache.index_select(0, beam_idx)
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
+
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False
diff --git a/bionemo-recipes/recipes/llama3_native_te/modeling_llama_te.py b/bionemo-recipes/recipes/llama3_native_te/modeling_llama_te.py
@@ -597,3 +597,8 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_key_cache = key_cache.index_select(0, beam_idx)
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
+
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False
diff --git a/bionemo-recipes/recipes/opengenome2_llama_native_te/modeling_llama_te.py b/bionemo-recipes/recipes/opengenome2_llama_native_te/modeling_llama_te.py
@@ -597,3 +597,8 @@ def reorder_cache(self, beam_idx: torch.LongTensor):
             updated_key_cache = key_cache.index_select(0, beam_idx)
             updated_value_cache = value_cache.index_select(0, beam_idx)
             self.cache_manager.cache[layer_number] = (updated_key_cache, updated_value_cache)
+
+    @property
+    def is_compileable(self) -> bool:
+        """Return False as this cache is not compatible with torch.compile."""
+        return False