rename torch_dtype to dtype

pstjohn · pstjohn · commit ce5630fc64fe · 2025-09-10T07:57:10.000-07:00
Signed-off-by: Peter St. John &lt;pstjohn@nvidia.com&gt;
diff --git a/models/amplify/export.py b/models/amplify/export.py
@@ -36,7 +36,7 @@
     # Smoke test that the model can be loaded.
     model_te = AutoModelForMaskedLM.from_pretrained(
         f"./checkpoint_export/{tag}",
-        torch_dtype=torch.bfloat16,
+        dtype=torch.bfloat16,
         trust_remote_code=True,
     )
     del model_te
diff --git a/models/amplify/src/amplify/amplify_te.py b/models/amplify/src/amplify/amplify_te.py
@@ -147,17 +147,15 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
             config.padded_vocab_size,
             config.hidden_size,
             padding_idx=config.pad_token_id,
-            dtype=config.torch_dtype,
+            dtype=config.dtype,
         )
 
         if config.layer_norm_after_embedding:
             self.layer_norm_1 = (
-                transformer_engine.pytorch.RMSNorm(
-                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
-                )
+                transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
                 if config.rms_norm
                 else transformer_engine.pytorch.LayerNorm(
-                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
+                    config.hidden_size, config.norm_eps, params_dtype=config.dtype
                 )
             )
 
@@ -197,7 +195,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
                     window_size=(-1, -1),
                     rotary_pos_interleaved=True,
                     seq_length=config.max_length,
-                    params_dtype=config.torch_dtype,
+                    params_dtype=config.dtype,
                 )
             )
 
@@ -278,7 +276,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
                 config.hidden_size,
                 config.padded_vocab_size,
                 config.norm_eps,
-                params_dtype=config.torch_dtype,
+                params_dtype=config.dtype,
                 normalization="RMSNorm" if config.rms_norm else "LayerNorm",
                 init_method=lambda x: torch.nn.init.uniform_(
                     x, -self.config.decoder_init_range, self.config.decoder_init_range
@@ -287,7 +285,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
 
         else:
             self.decoder = transformer_engine.pytorch.Linear(
-                config.hidden_size, config.vocab_size, params_dtype=config.torch_dtype
+                config.hidden_size, config.vocab_size, params_dtype=config.dtype
             )
 
     def forward(
diff --git a/models/amplify/src/amplify/state_dict_convert.py b/models/amplify/src/amplify/state_dict_convert.py
@@ -46,7 +46,7 @@ def convert_amplify_hf_to_te(model_hf: nn.Module, **config_kwargs) -> nn.Module:
     """
     te_config = AMPLIFYConfig(**model_hf.config.to_dict(), **config_kwargs)
     with init_empty_weights():
-        model_te = AMPLIFYForMaskedLM(te_config, torch_dtype=te_config.torch_dtype)
+        model_te = AMPLIFYForMaskedLM(te_config, dtype=te_config.dtype)
 
     output_model = io.apply_transforms(
         model_hf,
diff --git a/models/amplify/tests/conftest.py b/models/amplify/tests/conftest.py
@@ -36,7 +36,7 @@ def tokenizer():
 @pytest.fixture
 def config():
     config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
-    config.torch_dtype = torch.bfloat16
+    config.dtype = torch.bfloat16
     return config
 
 
diff --git a/models/amplify/tests/test_encoder_block.py b/models/amplify/tests/test_encoder_block.py
@@ -57,7 +57,7 @@ def data(self) -> torch.Tensor:
 @pytest.fixture
 def config():
     config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
-    config.torch_dtype = torch.bfloat16
+    config.dtype = torch.bfloat16
     return config
 
 
@@ -169,7 +169,7 @@ def test_encoder_block_forward(inputs, config):
         window_size=(-1, -1),
         rotary_pos_interleaved=True,
         seq_length=config.max_length,
-        params_dtype=config.torch_dtype,
+        params_dtype=config.dtype,
     ).to("cuda", dtype=torch.bfloat16)
 
     state_dict_mapping = {

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@`
`36`	`36`	`# Smoke test that the model can be loaded.`
`37`	`37`	`model_te = AutoModelForMaskedLM.from_pretrained(`
`38`	`38`	`f"./checkpoint_export/{tag}",`
`39`		`- torch_dtype=torch.bfloat16,`
	`39`	`+ dtype=torch.bfloat16,`
`40`	`40`	`trust_remote_code=True,`
`41`	`41`	`)`
`42`	`42`	`del model_te`