fix: add is_compileable attribute to HFInferenceParams for transformers 5.4+

svc-bionemo · svc-bionemo · commit 7cc1e78cbfbc · 2026-04-06T15:03:48.000-07:00
transformers &gt;= 5.4 checks cache.is_compileable in generate(). The custom
HFInferenceParams class (TE-based cache) did not implement this attribute,
causing AttributeError during test_generate_with_cache tests.

Set is_compileable = False since this cache type is not compatible with
torch.compile generate().

Tested locally:
- models/mixtral: 52 passed, 3 skipped, 26 xfailed (3 local-only OOM on 32GB GPU, pass on CI L4)
- recipes/mixtral_native_te: 7 passed
- recipes/opengenome2_mixtral_native_te: 20 passed

Signed-off-by: svc-bionemo &lt;267129667+svc-bionemo@users.noreply.github.com&gt;
diff --git a/bionemo-recipes/models/mixtral/modeling_mixtral_te.py b/bionemo-recipes/models/mixtral/modeling_mixtral_te.py
@@ -880,6 +880,8 @@ def _unpad_input(hidden_states, attention_mask, unused_mask=None):
 class HFInferenceParams(InferenceParams):
     """Extension of the InferenceParams class to support HF generate() and beam search."""
 
+    # Required by transformers >= 5.4 _valid_auto_compile_criteria(); this
+    # custom TE-based cache is not compatible with torch.compile generate().
     is_compileable = False
 
     def get_seq_length(self, layer_idx: int = 0) -> int:
diff --git a/bionemo-recipes/recipes/mixtral_native_te/modeling_mixtral_te.py b/bionemo-recipes/recipes/mixtral_native_te/modeling_mixtral_te.py
@@ -886,6 +886,8 @@ def _unpad_input(hidden_states, attention_mask, unused_mask=None):
 class HFInferenceParams(InferenceParams):
     """Extension of the InferenceParams class to support HF generate() and beam search."""
 
+    # Required by transformers >= 5.4 _valid_auto_compile_criteria(); this
+    # custom TE-based cache is not compatible with torch.compile generate().
     is_compileable = False
 
     def get_seq_length(self, layer_idx: int = 0) -> int:
diff --git a/bionemo-recipes/recipes/opengenome2_mixtral_native_te/modeling_mixtral_te.py b/bionemo-recipes/recipes/opengenome2_mixtral_native_te/modeling_mixtral_te.py
@@ -880,6 +880,8 @@ def _unpad_input(hidden_states, attention_mask, unused_mask=None):
 class HFInferenceParams(InferenceParams):
     """Extension of the InferenceParams class to support HF generate() and beam search."""
 
+    # Required by transformers >= 5.4 _valid_auto_compile_criteria(); this
+    # custom TE-based cache is not compatible with torch.compile generate().
     is_compileable = False
 
     def get_seq_length(self, layer_idx: int = 0) -> int:
diff --git a/ci/scripts/check_copied_files.py b/ci/scripts/check_copied_files.py
@@ -205,11 +205,6 @@ def _compare_file_contents(source_file: Path, dest_file: Path, source_display: s
     "bionemo-recipes/models/codonfm/modeling_codonfm_te.py": [
         "bionemo-recipes/recipes/codonfm_native_te/modeling_codonfm_te.py",
     ],
-    # Mixtral TE model -> recipe sync
-    "bionemo-recipes/models/mixtral/modeling_mixtral_te.py": [
-        "bionemo-recipes/recipes/mixtral_native_te/modeling_mixtral_te.py",
-        "bionemo-recipes/recipes/opengenome2_mixtral_native_te/modeling_mixtral_te.py",
-    ],
     # Common test library - synced between models
     "bionemo-recipes/models/esm2/tests/common": [
         "bionemo-recipes/models/llama3/tests/common",