add gpt to predict.py and infer.py

yzhang123 · yzhang123 · commit 8d22da636cd1 · 2025-09-03T09:13:12.000-07:00
Signed-off-by: Yang Zhang &lt;yangzhang@nvidia.com&gt;
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/models/gpt.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/models/gpt.py
@@ -91,15 +91,13 @@ def get_inference_wrapper(
         self, params_dtype, inference_batch_times_seqlen_threshold, inference_max_seq_length=8192
     ) -> GPTInferenceWrapper:
         """Gets the inference wrapper for the Mamba model."""
-        # Find MCoreMambaModel instance
-        mcore_model = self.module
-        while mcore_model:
-            if isinstance(mcore_model, ()):
+        model = self
+        while model is not None:
+            if getattr(model, "module", None) is not None:
+                model = model.module
+            else:
                 break
-            mcore_model = getattr(mcore_model, "module", None)
-        if mcore_model is None or not isinstance(
-            mcore_model, (megatron.core.models.gpt.gpt_model.GPTModel, Evo2StyleMCoreGPTModel)
-        ):
+        if not isinstance(model, megatron.core.models.gpt.gpt_model.GPTModel):
             raise ValueError("GPT model instance not found in the model structure.")
 
         vocab_size = None
@@ -111,14 +109,14 @@ def get_inference_wrapper(
             raise ValueError("Unable to find vocab size.")
 
         inference_wrapper_config = InferenceWrapperConfig(
-            hidden_size=mcore_model.config.hidden_size,
+            hidden_size=model.config.hidden_size,
             params_dtype=params_dtype,
             inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold,
             padded_vocab_size=vocab_size,
             inference_max_seq_length=inference_max_seq_length,
         )
 
-        model_inference_wrapper = GPTInferenceWrapper(mcore_model, inference_wrapper_config)
+        model_inference_wrapper = GPTInferenceWrapper(model, inference_wrapper_config)
         return model_inference_wrapper
 
     @override
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/infer.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/infer.py
@@ -86,14 +86,12 @@ def parse_args():
     )
     ap.add_argument(
         "--fp8",
-        type=bool,
         action="store_true",
         default=False,
         help="Whether to use vortex style FP8. Defaults to False.",
     )
     ap.add_argument(
         "--flash-decode",
-        type=bool,
         action="store_true",
         default=False,
         help="Whether to use flash decode. Defaults to True.",
@@ -173,8 +171,8 @@ def infer(
         path=ckpt_dir,
         trainer=trainer,
         params_dtype=torch.bfloat16,
-        inference_batch_times_seqlen_threshold=8192,  # TODO
-        inference_max_seq_length=8192,  # TODO
+        inference_batch_times_seqlen_threshold=len(prompt) + max_new_tokens,  # TODO
+        inference_max_seq_length=len(prompt) + max_new_tokens,  # TODO
         recompute_granularity=None,
         recompute_num_layers=None,
         recompute_method=None,
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/predict.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/predict.py
@@ -37,6 +37,7 @@
 from torch import Tensor
 
 from bionemo.evo2.data.fasta_dataset import SimpleFastaDataset
+from bionemo.evo2.models.gpt import GPT_MODEL_OPTIONS
 
 # Add import for Mamba models
 from bionemo.evo2.models.mamba import MAMBA_MODEL_OPTIONS, MambaModel
@@ -73,15 +74,17 @@ def parse_args():
     ap.add_argument(
         "--model-type",
         type=str,
-        choices=["hyena", "mamba"],
+        choices=["hyena", "mamba", "gpt"],
         default="hyena",
-        help="Model architecture family to use. Choose between 'hyena' and 'mamba'.",
+        help="Model architecture family to use. Choose between 'hyena', 'mamba', and 'gpt'.",
     )
     ap.add_argument(
         "--model-size",
         type=str,
         default="7b",
-        choices=sorted(list(HYENA_MODEL_OPTIONS.keys()) + list(MAMBA_MODEL_OPTIONS.keys())),
+        choices=sorted(
+            list(HYENA_MODEL_OPTIONS.keys()) + list(MAMBA_MODEL_OPTIONS.keys()) + list(GPT_MODEL_OPTIONS.keys())
+        ),
         help="Model size to use. Defaults to '7b'.",
     )
     # output args:
@@ -416,7 +419,7 @@ def predict(
             vortex_style_fp8=fp8 and not full_fp8,
             **config_modifiers_init,
         )
-    else:  # mamba
+    elif model_type == "mamba":  # mamba
         if model_size not in MAMBA_MODEL_OPTIONS:
             raise ValueError(f"Invalid model size for Mamba: {model_size}")
         config = MAMBA_MODEL_OPTIONS[model_size](
@@ -425,6 +428,15 @@ def predict(
             distribute_saved_activations=False if sequence_parallel and tensor_parallel_size > 1 else True,
             **config_modifiers_init,
         )
+    elif model_type == "gpt":
+        if model_size not in GPT_MODEL_OPTIONS:
+            raise ValueError(f"Invalid model size for GPT: {model_size}")
+        config = GPT_MODEL_OPTIONS[model_size](
+            forward_step_fn=hyena_predict_forward_step,
+            data_step_fn=hyena_predict_data_step,
+        )
+    else:
+        raise ValueError(f"Invalid model type: {model_type}")
 
     trainer.strategy._setup_optimizers = False
 
@@ -451,13 +463,20 @@ def predict(
             output_log_prob_seqs=output_log_prob_seqs,
             log_prob_collapse_option=log_prob_collapse_option,
         )
-    else:  # mamba
+    elif model_type == "mamba":  # mamba
         model = MambaPredictor(
             config,
             tokenizer=tokenizer,
             output_log_prob_seqs=output_log_prob_seqs,
             log_prob_collapse_option=log_prob_collapse_option,
         )
+    elif model_type == "gpt":
+        model = HyenaPredictor(
+            config,
+            tokenizer=tokenizer,
+            output_log_prob_seqs=output_log_prob_seqs,
+            log_prob_collapse_option=log_prob_collapse_option,
+        )
 
     resume.setup(trainer, model)  # this pulls weights from the starting checkpoint.