remove amplify kwargs

pstjohn · pstjohn · commit 0b1153fd2ceb · 2025-09-10T07:50:11.000-07:00
Signed-off-by: Peter St. John &lt;pstjohn@nvidia.com&gt;
diff --git a/models/amplify/.devcontainer/devcontainer.json b/models/amplify/.devcontainer/devcontainer.json
@@ -2,14 +2,11 @@
 // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
 {
     "name": "Existing Dockerfile",
-    "build": {
-        "context": "..",
-        "dockerfile": "Dockerfile.dev"
-    },
+    "image": "svcbionemo023/bionemo-framework:amplify-model-devcontainer-082025",
     "mounts": [
         "source=${localEnv:HOME}/.cache,target=/home/ubuntu/.cache,type=bind,consistency=cached"
     ],
-    "postCreateCommand": "pip install -e .[convert,test]",
+    "postCreateCommand": "PIP_CONSTRAINT= pip install -e .",
     "remoteUser": "ubuntu",
     "runArgs": [
         "--gpus=all",
diff --git a/models/amplify/src/amplify/amplify_te.py b/models/amplify/src/amplify/amplify_te.py
@@ -215,7 +215,6 @@ def forward(
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
-        **kwargs,
     ) -> BaseModelOutput:
         """Forward pass of the AMPLIFY model.
 
@@ -225,7 +224,6 @@ def forward(
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
-            **kwargs: Additional arguments.
 
         Returns:
             BaseModelOutput: The output of the model.
@@ -299,7 +297,6 @@ def forward(
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
-        **kwargs,
     ) -> MaskedLMOutput:
         """Forward pass of the AMPLIFYForMaskedLM model.
 
@@ -309,7 +306,6 @@ def forward(
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
-            **kwargs: Additional arguments.
 
         Returns:
             MaskedLMOutput: The output of the model.
@@ -320,7 +316,6 @@ def forward(
             output_hidden_states,
             output_attentions,
             labels,
-            **kwargs,
         )
 
         # Classification head with layer norm
diff --git a/models/amplify/tests/conftest.py b/models/amplify/tests/conftest.py
@@ -68,6 +68,7 @@ def input_data(tokenizer):
         tokenizer=tokenizer,
         mlm_probability=0.15,
         pad_to_multiple_of=1024,
+        seed=42,
     )
 
     def tokenize_function(examples):
diff --git a/models/amplify/tests/test_amplify_model.py b/models/amplify/tests/test_amplify_model.py
@@ -168,3 +168,52 @@ def test_convert_state_dict():
     te_state_dict_keys.remove("decoder.bias")
 
     assert len(te_state_dict_keys) == 0
+
+
+def test_hf_trained_model_loss(input_data):
+    model = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
+    model.to("cuda", dtype=torch.bfloat16)
+    input_data = {k: v.to("cuda") for k, v in input_data.items()}
+    model.eval()
+    with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
+        output = model(**input_data)
+
+    torch.testing.assert_close(output.loss.detach().cpu(), torch.tensor(2.4), atol=1e-1, rtol=1e-2)
+
+
+def test_te_trained_model_loss(input_data):
+    model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
+    model = convert_amplify_hf_to_te(model_hf)
+    model.to("cuda", dtype=torch.bfloat16)
+    input_data = {k: v.to("cuda") for k, v in input_data.items()}
+    model.eval()
+    with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
+        output = model(**input_data)
+
+    torch.testing.assert_close(output.loss.detach().cpu(), torch.tensor(2.4), atol=1e-1, rtol=1e-2)
+
+
+def test_hf_reinitialized_model_loss(input_data):
+    config = amp_hf.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M")
+    model = amp_hf.AMPLIFY(config)
+    model.to("cuda", dtype=torch.bfloat16)
+    input_data = {k: v.to("cuda") for k, v in input_data.items()}
+    model.eval()
+    with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
+        output = model(**input_data)
+
+    loss = output.loss.detach().cpu()
+    assert loss < 3.5, f"Loss is {loss}, expected less than 3.5"
+
+
+def test_te_reinitialized_model_loss(input_data):
+    config = amp_te.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M")
+    model = amp_te.AMPLIFYForMaskedLM(config)
+    model.to("cuda", dtype=torch.bfloat16)
+    input_data = {k: v.to("cuda") for k, v in input_data.items()}
+    model.eval()
+    with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
+        output = model(**input_data)
+
+    loss = output.loss.detach().cpu()
+    assert loss < 3.5, f"Loss is {loss}, expected less than 3.5"

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ def input_data(tokenizer):`
`68`	`68`	`tokenizer=tokenizer,`
`69`	`69`	`mlm_probability=0.15,`
`70`	`70`	`pad_to_multiple_of=1024,`
	`71`	`+ seed=42,`
`71`	`72`	`)`
`72`	`73`
`73`	`74`	`def tokenize_function(examples):`