Address PR feedback

jstjohn · jstjohn · commit 5411e0937fff · 2026-05-22T14:17:28.000-07:00
Signed-off-by: John St. John &lt;jstjohn@nvidia.com&gt;
diff --git a/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/hyena_block.py b/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/hyena_block.py
@@ -121,8 +121,9 @@ def __init__(
             pp_layer_offset, layer_type_list = self._select_layers_for_pipeline_parallel(layer_type_list)
 
         if get_cpu_offload_context is not None:
-            # Megatron Core changed this helper from six to seven positional arguments
-            # across releases. Pass only the arguments accepted by the installed version.
+            # MCore 0.x has shipped both six- and seven-argument variants of this helper.
+            # Pass only the arguments accepted by the installed version; if a future helper
+            # uses *args, pass the full compatibility list rather than counting *args as one slot.
             offload_args = [
                 self.config.cpu_offloading,
                 self.config.cpu_offloading_num_layers,
@@ -132,9 +133,16 @@ def __init__(
                 self.config.cpu_offloading_double_buffering,
                 getattr(self.config, "cpu_offloading_retain_pinned_cpu_buffers", False),
             ]
-            num_offload_params = len(inspect.signature(get_cpu_offload_context).parameters)
+            offload_params = tuple(inspect.signature(get_cpu_offload_context).parameters.values())
+            if any(param.kind is inspect.Parameter.VAR_POSITIONAL for param in offload_params):
+                num_offload_args = len(offload_args)
+            else:
+                num_offload_args = sum(
+                    param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
+                    for param in offload_params
+                )
             (self.offload_context, self.group_prefetch_offload_commit_async) = get_cpu_offload_context(
-                *offload_args[:num_offload_params],
+                *offload_args[:num_offload_args],
             )
             self.config._cpu_offloading_context = self.offload_context if self.config.cpu_offloading else None
         else:
diff --git a/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/hyena_utils.py b/bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/hyena_utils.py
@@ -467,7 +467,7 @@ def fftconv_func(
 ):
     """Apply a 1D convolution to the input sequence u using the filter k and the shortcut D."""
     seqlen = u.shape[-1]
-    fft_size = 2 * seqlen
+    fft_size = max(2 * seqlen, 2 * k.shape[-1])
 
     # check if k is less than seqlen -- subquadratic_ops input does not need padding
     if not use_subquadratic_ops and k.shape[-1] < seqlen:
@@ -499,7 +499,6 @@ def fftconv_func(
         if use_subquadratic_ops:
             y = fft_causal_conv1d(u, k.squeeze(0))
         else:
-            fft_size = max(fft_size, 2 * k.shape[-1])
             k_f = torch.fft.rfft(k, n=fft_size) / fft_size
             if k_rev is not None:
                 k_rev_f = torch.fft.rfft(k_rev, n=fft_size) / fft_size
@@ -646,6 +645,8 @@ def compute_filter(self, L, t, glogp, R):  # noqa: N803
 
         return h, None
 
+    # Keep this eager. The short-prefill prefix-invariance tests in tests/bionemo/evo2/run
+    # cover the prior torch.compile regression with dynamic filter lengths and custom ops.
     def filter(self, L, *args, **kwargs):  # noqa: N803
         """Get t and the convolution filter for t and the requested sequence length."""
         if self._cp_size > 1:
@@ -768,8 +769,7 @@ def forward(self, L, *args, **kwargs):  # noqa: N803
         """
         return self.filter(L, *args, **kwargs)
 
-    # Keep this eager. Compiling this helper can leave global dispatcher state
-    # that interferes with unrelated custom autograd/custom-op call sites.
+    # Keep this eager for the same short-prefill prefix-invariance reproducer as ImplicitModalFilter.filter.
     def filter(self, L, *args, **kwargs):  # noqa: N803
         """Compute the filter as a function of h and decay for the requested sequence length."""
         h = self.h[:, :L]
diff --git a/bionemo-recipes/recipes/evo2_megatron/tests/bionemo/evo2/models/megatron/hyena/test_hyena_utils.py b/bionemo-recipes/recipes/evo2_megatron/tests/bionemo/evo2/models/megatron/hyena/test_hyena_utils.py
@@ -537,6 +537,27 @@ def test_fftconv_func():
     assert output_short.shape == u.shape
 
 
+def test_fftconv_func_bidirectional_is_prefix_invariant_when_filter_is_longer_than_input():
+    """Bidirectional FFT convolution should not alias short prefixes when the filter is long."""
+    torch.manual_seed(1234)
+    batch_size = 2
+    short_len = 5
+    long_len = 64
+    hidden_size = 4
+    filter_len = 64
+
+    u_short = torch.randn(batch_size, hidden_size, short_len)
+    u_long = torch.zeros(batch_size, hidden_size, long_len)
+    u_long[..., :short_len] = u_short
+    k = torch.randn(1, 2 * hidden_size, filter_len)
+    D = torch.randn(hidden_size)  # noqa: N806
+
+    short_out = fftconv_func(u_short, k, D, None, gelu=False, bidirectional=True)
+    long_out = fftconv_func(u_long, k, D, None, gelu=False, bidirectional=True)[..., :short_len]
+
+    torch.testing.assert_close(short_out, long_out, rtol=1e-5, atol=1e-5)
+
+
 def test_fftconv_func_high_dimensional_input():
     """Test fftconv_func with high-dimensional input to cover the len(u.shape) > 3 case."""
     batch_size = 2