Distillative-AI
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 9 additions & 2 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 1 addition & 0 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.devcontainer/initializeCommand.sh‎
Lines changed: 1 addition & 0 deletions b/‎.devcontainer/initializeCommand.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.devcontainer/postCreateCommand.sh‎
Lines changed: 5 additions & 0 deletions b/‎.devcontainer/postCreateCommand.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.devcontainer/start.sh‎
Lines changed: 1 addition & 1 deletion b/‎.devcontainer/start.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bionemo-recipes/recipes/evo2_megatron/.ci_build.sh‎
Lines changed: 3 additions & 3 deletions b/‎bionemo-recipes/recipes/evo2_megatron/.ci_build.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎bionemo-recipes/recipes/evo2_megatron/build_requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎bionemo-recipes/recipes/evo2_megatron/build_requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bionemo-recipes/recipes/evo2_megatron/pyproject.toml‎
Lines changed: 17 additions & 7 deletions b/‎bionemo-recipes/recipes/evo2_megatron/pyproject.toml‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/evo2_provider.py‎
Lines changed: 31 additions & 1 deletion b/‎bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/evo2_provider.py‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/engine.py‎
Lines changed: 17 additions & 7 deletions b/‎bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/engine.py‎
Lines changed: 17 additions & 7 deletions
@@ -10,9 +10,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements.txt,target=/workspace/requirements.txt \
     PIP_CONSTRAINT= pip install -r /workspace/requirements.txt
 
+# Sandboxed agent CLIs use these helpers on Linux.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    bubblewrap \
+    uidmap \
+    && rm -rf /var/lib/apt/lists/*
+
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/
+
 USER ubuntu
-RUN curl https://cursor.com/install -fsS | bash  # Install cursor-agent CLI tool
-RUN curl -fsSL https://claude.ai/install.sh | bash  # Install Claude CLI tool
+RUN curl https://cursor.com/install -fsS | bash || true  # Install cursor-agent CLI tool
+RUN curl -fsSL https://claude.ai/install.sh | bash || true # Install Claude CLI tool
 RUN uv tool install pre-commit --with pre-commit-uv --force-reinstall
 ENV PATH="/home/ubuntu/.local/bin:${PATH}"
@@ -8,6 +8,7 @@
         "source=${localEnv:HOME}/.cache,target=/home/ubuntu/.cache,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.claude,target=/home/ubuntu/.claude,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.claude.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.codex,target=/home/ubuntu/.codex,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.config,target=/home/ubuntu/.config,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.cursor,target=/home/ubuntu/.cursor,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.gnupg,target=/home/ubuntu/.gnupg,type=bind,consistency=cached",
 
@@ -8,6 +8,7 @@ mkdir -p ~/.gnupg
 mkdir -p ~/.config
 mkdir -p ~/.cursor
 mkdir -p ~/.claude
+mkdir -p ~/.codex
 [ ! -f ~/.netrc ] && touch ~/.netrc
 
 [ ! -f ~/.bash_history_devcontainer ] && touch ~/.bash_history_devcontainer
 
@@ -1,5 +1,10 @@
 #!/usr/bin/env bash
 set -euo pipefail
+
+# Install Codex as the devcontainer user so the binary lands in the mounted user environment.
+if ! command -v codex >/dev/null 2>&1; then
+  curl -fsSL https://chatgpt.com/codex/install.sh | sh || true  # do not fail if there are URL resolutions with codex
+fi
 # Run via uv to avoid relying on updated PATH in this shell
 if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
   # Some editors (VS Code, Cursor) set core.hooksPath in .git/config, which
 
@@ -9,7 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
 DEVCONTAINER_JSON="${SCRIPT_DIR}/devcontainer.json"
 CONTAINER_NAME="${BIONEMO_CONTAINER_NAME:-bionemo-devcontainer}"
-IMAGE_NAME="${BIONEMO_IMAGE_NAME:-bionemo-devcontainer:latest}"
+IMAGE_NAME="${BIONEMO_IMAGE_NAME:-${CONTAINER_NAME}:latest}"
 
 # ---------------------------------------------------------------------------
 # Helpers
 
@@ -5,7 +5,7 @@
 rm -f /usr/local/lib/python*/dist-packages/transformer_engine-*.dist-info/direct_url.json
 export UV_LOCK_TIMEOUT=900  # increase to 15 minutes (900 seconds), adjust as needed
 export UV_LINK_MODE=copy
-uv venv --system-site-packages
+uv venv --clear --system-site-packages
 
 # 2. Activate the environment
 source .venv/bin/activate
@@ -38,8 +38,8 @@ for pkg_dir in "$RECIPE_ROOT/../../../sub-packages/bionemo-recipeutils" "$RECIPE
     fi
 done
 
-# 6. Install the recipe with all remaining dependencies
-uv pip install -c pip-constraints.txt -e . --no-build-isolation
+# 6. Install the recipe with all remaining dependencies, including test extras
+uv pip install -c pip-constraints.txt -e '.[test]' --no-build-isolation
 
 # 7. Restore original pyproject.toml (the edit was only needed for uv resolution)
 mv pyproject.toml.ci_bak pyproject.toml
@@ -1,3 +1,5 @@
 poetry-core
+poetry_dynamic_versioning  # build dep of nvidia-resiliency-ext (transitively pulled by megatron-bridge); needed in the venv because we install with --no-build-isolation
+grpcio-tools  # build dep of nvidia-resiliency-ext: its setup.py shells out to `python -m grpc_tools.protoc` to compile *.proto files; --no-build-isolation means we have to provide it in the venv up-front
 wheel_stub
 ninja  # should speed up causal-conv1d build
@@ -24,9 +24,10 @@ dependencies = [
     "causal_conv1d",
     "nv-grouped-gemm",
     "megatron-core",
-    "nvidia-resiliency-ext",
+    # nvidia-resiliency-ext is pulled transitively by megatron-bridge.
     "emerging_optimizers",
     "subquadratic-ops-torch-cu13",
+    "email-validator",
 
     # These are dependencies for examples only, but are useful for actually doing analyses with this model
     "biopython",
@@ -35,7 +36,9 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-test = []
+test = [
+    "pytest>=8.0",
+]
 
 [project.scripts]
 torchrun = "torch.distributed.run:main"
@@ -88,22 +91,29 @@ override-dependencies = [
     "triton; sys_platform == 'never'",
     "transformer-engine; sys_platform == 'never'",
     "transformer-engine[pytorch]; sys_platform == 'never'",
+    # Avoid alpha Pydantic releases; langchain imports pulled by nvidia-resiliency-ext are not compatible.
+    "pydantic>=2.12,<2.14",
+    # Avoid optional log-pattern-mining dependency conflicts from nvidia-resiliency-ext.
+    "logsage; sys_platform == 'never'",
+    "drain3; sys_platform == 'never'",
 ]
 
 [tool.uv.sources]
 # Shared recipe utilities (framework-agnostic)
 # External dependencies with specific git tags/commits
-causal_conv1d = { git = "https://github.com/Dao-AILab/causal-conv1d.git", tag = "v1.5.4" }
+# 1.6.1 fixes a custom-op no-storage failure in no-grad/frozen forward paths.
+causal_conv1d = { git = "https://github.com/Dao-AILab/causal-conv1d.git", tag = "v1.6.1" }
 nv-grouped-gemm = { git = "https://github.com/fanshiqing/grouped_gemm", tag = "v1.1.4.post6" }
 
 # Internal dependencies
 bionemo-recipeutils = { git = "https://github.com/NVIDIA/bionemo-framework.git", branch = "main", subdirectory = "sub-packages/bionemo-recipeutils" }
 bionemo-core = { git = "https://github.com/NVIDIA/bionemo-framework.git", branch = "main", subdirectory = "sub-packages/bionemo-core" }
-nvidia-resiliency-ext = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git", rev = "54f85fe422d296cf04ea524130014bd3a2c3add1" }
+# nvidia-resiliency-ext is intentionally left to Megatron-Bridge so the transitive pin stays consistent.
 
-# Megatron Bundle. This points to a version that still supports the deprecated no_weight_decay_cond field until the API for an alternative has been finalized.
-megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "549e3cb970c170b1d7a86d021261efe05e8a5d9f" }
-megatron-core = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "549e3cb970c170b1d7a86d021261efe05e8a5d9f", subdirectory = "3rdparty/Megatron-LM" }
+# Megatron Bundle. MCore is sourced from the same Megatron-Bridge release tag.
+megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", tag = "v0.4.1" }
+megatron-core = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", tag = "v0.4.1", subdirectory = "3rdparty/Megatron-LM" }
 
 [tool.uv.extra-build-dependencies]
 warp-lang = ["wheel_stub"]
+nvidia-resiliency-ext = ["poetry_dynamic_versioning"]
@@ -18,8 +18,10 @@
 
 
 import math
+import sys
 from dataclasses import dataclass
 from functools import partial
+from pathlib import Path
 from typing import Callable, Iterable, Literal, Optional, Type
 
 import torch
@@ -35,6 +37,7 @@
 from megatron.bridge.training.state import GlobalState
 from megatron.bridge.training.utils.packed_seq_utils import get_packed_seq_params
 from megatron.bridge.training.utils.pg_utils import get_pg_collection
+from megatron.bridge.utils.instantiate_utils import register_allowed_target_prefix
 from megatron.bridge.utils.vocab_utils import calculate_padded_vocab_size
 from megatron.core import parallel_state
 from megatron.core.inference.contexts import StaticInferenceContext
@@ -53,6 +56,33 @@
 from bionemo.evo2.models.megatron.hyena.hyena_utils import hyena_no_weight_decay_cond
 
 
+def _patch_megatron_dataset_helper_compile() -> None:
+    """Skip Megatron's runtime helper build when a wheel already ships the extension."""
+    from megatron.core.datasets import utils as dataset_utils
+
+    original_compile_helpers = dataset_utils.compile_helpers
+    if getattr(original_compile_helpers, "_evo2_prebuilt_helper_guard", False):
+        guarded_compile_helpers = original_compile_helpers
+    else:
+
+        def guarded_compile_helpers() -> None:
+            datasets_dir = Path(dataset_utils.__file__).resolve().parent
+            if not (datasets_dir / "Makefile").exists() and list(datasets_dir.glob("helpers_cpp*.so")):
+                return None
+            return original_compile_helpers()
+
+        guarded_compile_helpers._evo2_prebuilt_helper_guard = True
+        dataset_utils.compile_helpers = guarded_compile_helpers
+
+    bridge_initialize = sys.modules.get("megatron.bridge.training.initialize")
+    if bridge_initialize is not None:
+        bridge_initialize.compile_helpers = guarded_compile_helpers
+
+
+_patch_megatron_dataset_helper_compile()
+register_allowed_target_prefix("bionemo.evo2.")
+
+
 def get_vocab_size(*args, **kwargs):
     raise NotImplementedError("FIXME get_vocab_size is not implemented Find it in megatron bridge")
 
@@ -306,7 +336,7 @@ class HyenaModelProvider(TransformerConfig, ModelProviderMixin[MCoreHyenaModel])
     apply_rope_fusion: bool = True
     make_vocab_size_divisible_by: int = 128
     gated_linear_unit: bool = True
-    fp32_residual_connection: bool = True
+    fp32_residual_connection: bool = False
     normalization: str = "RMSNorm"
     add_bias_linear: bool = False
     hidden_dropout: float = 0.0
 
@@ -19,13 +19,17 @@
 import torch.nn.functional as F  # noqa: N812
 from einops import rearrange
 
+from bionemo.evo2.models.megatron.hyena.fft_utils import linear_causal_fft_size
+
 
 try:
     from subquadratic_ops_torch.causal_conv1d import causal_conv1d as _subq_causal_conv1d
     from subquadratic_ops_torch.fft_causal_conv1d import fft_causal_conv1d as _subq_fft_causal_conv1d
+    from subquadratic_ops_torch.rearrange import rearrange as _subq_rearrange
 except ImportError as _subq_import_error:
     _subq_causal_conv1d = None
     _subq_fft_causal_conv1d = None
+    _subq_rearrange = None
     _subq_error_msg = f"subquadratic_ops_torch not available: {_subq_import_error}"
 
 
@@ -50,7 +54,7 @@ def fftconv_func(*, u, k, D):  # noqa: N803
     The convolution is computed in the frequency domain and then transformed back to the time domain.
     """
     seqlen = u.shape[-1]
-    fft_size = 2 * seqlen
+    fft_size = linear_causal_fft_size(seqlen, k.shape[-1])
 
     k_f = torch.fft.rfft(k, n=fft_size) / fft_size
     k_f = adjust_filter_shape_for_broadcast(u, k_f)
@@ -76,11 +80,15 @@ def parallel_fir(
 ):
     """Compute parallel finite impulse response filtering with optional state computation."""
     L = u.shape[1]  # noqa: N806
-    u = rearrange(u, "b l d -> b d l")
 
     if use_subquadratic_ops and _subq_fft_causal_conv1d is None:
         raise ImportError(_subq_error_msg)
 
+    if use_subquadratic_ops:
+        u = _subq_rearrange(u.transpose(0, 1), bhl_to_lbh=False)
+    else:
+        u = rearrange(u, "b l d -> b d l")
+
     if fir_length >= 128:
         if use_subquadratic_ops:
             # subq-ops fft_causal_conv1d expects [B, D, L] input and [D, L] filter; dtypes must match
@@ -99,7 +107,9 @@ def parallel_fir(
                 ).to(dtype=u.dtype)
     else:
         if use_subquadratic_ops:
-            # subq-ops causal_conv1d expects pre-padded [B, D, L+pad] input and [D, K] weight; dtypes must match
+            if _subq_causal_conv1d is None:
+                raise ImportError(_subq_error_msg)
+            # subq-ops causal_conv1d expects pre-padded [B, D, L+pad] input and [D, K] weight.
             pad_size = fir_length - 1
             x_padded = F.pad(u.to(torch.float32), (pad_size, 0))
             w = weight.squeeze(1) if weight.dim() == 3 else weight
@@ -111,7 +121,7 @@ def parallel_fir(
                 bias=None,
                 stride=1,
                 padding=fir_length - 1,
-                groups=u.shape[1],  # always set to D, regardless of filter grouping
+                groups=u.shape[1],
             )[..., :L]
 
         z = z.to(u.dtype)
@@ -130,7 +140,7 @@ def parallel_fir(
 
 def parallel_iir(*, z_pre, h, D, L, poles, t, hidden_size, compute_state):  # noqa: N803
     """Compute the output state of the short convolutional filter."""
-    fft_size = 2 * L
+    fft_size = linear_causal_fft_size(L, h.shape[-1])
     x1, x2, v = z_pre.split([hidden_size, hidden_size, hidden_size], dim=1)
 
     x1v = x1 * v
@@ -221,9 +231,9 @@ def prefill_via_modal_fft(*, x1v, L, poles, t, X_s):  # noqa: N803
     # When the model has a long convolution derived from a recurrence in modal form and prefill_style is "fft",
     # we split the filter into poles and residues and reuse FFT computation on the input.
     bs = x1v.shape[0]
-    fft_size = 2 * L
+    fft_size = X_s.shape[-1]
     state_s = (poles.to(torch.float32) * t).exp()
-    state_S = torch.fft.fft(state_s, n=fft_size).repeat(bs, 1, 1, 1)  # noqa N806: B, D, state_dim, 2 * L
+    state_S = torch.fft.fft(state_s, n=fft_size).repeat(bs, 1, 1, 1)  # noqa N806: B, D, state_dim, fft_size
     state = torch.fft.ifft(X_s[..., None, :] * state_S, n=fft_size)
     # Do not try to fix `UserWarning: Casting complex values to real discards
     # the imaginary part` by inserting state.real conversion anywhere before