Skip to content

Commit b6600a5

Browse files
committed
Switch deprecated torch_dtype parameter to dtype
Signed-off-by: Peter St. John <pstjohn@nvidia.com>
1 parent 0d30652 commit b6600a5

17 files changed

Lines changed: 36 additions & 38 deletions

File tree

models/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def convert_hf_to_te(model_hf: nn.Module, **config_kwargs) -> nn.Module:
111111
"""Convert HuggingFace model to TransformerEngine format."""
112112
te_config = MyModelTEConfig(**model_hf.config.to_dict(), **config_kwargs)
113113
with init_empty_weights():
114-
model_te = MyModelTE(te_config, torch_dtype=te_config.torch_dtype)
114+
model_te = MyModelTE(te_config, dtype=te_config.dtype)
115115

116116
output_model = io.apply_transforms(model_hf, model_te, ...)
117117
return output_model

models/amplify/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
# Smoke test that the model can be loaded.
3737
model_te = AutoModelForMaskedLM.from_pretrained(
3838
f"./checkpoint_export/{tag}",
39-
torch_dtype=torch.bfloat16,
39+
dtype=torch.bfloat16,
4040
trust_remote_code=True,
4141
)
4242
del model_te

models/amplify/src/amplify/amplify_te.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,17 +147,15 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
147147
config.padded_vocab_size,
148148
config.hidden_size,
149149
padding_idx=config.pad_token_id,
150-
dtype=config.torch_dtype,
150+
dtype=config.dtype,
151151
)
152152

153153
if config.layer_norm_after_embedding:
154154
self.layer_norm_1 = (
155-
transformer_engine.pytorch.RMSNorm(
156-
config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
157-
)
155+
transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
158156
if config.rms_norm
159157
else transformer_engine.pytorch.LayerNorm(
160-
config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
158+
config.hidden_size, config.norm_eps, params_dtype=config.dtype
161159
)
162160
)
163161

@@ -194,7 +192,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
194192
window_size=(-1, -1),
195193
rotary_pos_interleaved=True,
196194
seq_length=config.max_length,
197-
params_dtype=config.torch_dtype,
195+
params_dtype=config.dtype,
198196
)
199197
)
200198

@@ -277,7 +275,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
277275
config.hidden_size,
278276
config.padded_vocab_size,
279277
config.norm_eps,
280-
params_dtype=config.torch_dtype,
278+
params_dtype=config.dtype,
281279
normalization="RMSNorm" if config.rms_norm else "LayerNorm",
282280
init_method=lambda x: torch.nn.init.uniform_(
283281
x, -self.config.decoder_init_range, self.config.decoder_init_range
@@ -286,7 +284,7 @@ def __init__(self, config: AMPLIFYConfig, **kwargs):
286284

287285
else:
288286
self.decoder = transformer_engine.pytorch.Linear(
289-
config.hidden_size, config.vocab_size, params_dtype=config.torch_dtype
287+
config.hidden_size, config.vocab_size, params_dtype=config.dtype
290288
)
291289

292290
def forward(

models/amplify/src/amplify/state_dict_convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def convert_amplify_hf_to_te(model_hf: nn.Module, **config_kwargs) -> nn.Module:
4646
"""
4747
te_config = AMPLIFYConfig(**model_hf.config.to_dict(), **config_kwargs)
4848
with init_empty_weights():
49-
model_te = AMPLIFYForMaskedLM(te_config, torch_dtype=te_config.torch_dtype)
49+
model_te = AMPLIFYForMaskedLM(te_config, dtype=te_config.dtype)
5050

5151
output_model = io.apply_transforms(
5252
model_hf,

models/amplify/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def tokenizer():
3636
@pytest.fixture
3737
def config():
3838
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
39-
config.torch_dtype = torch.bfloat16
39+
config.dtype = torch.bfloat16
4040
return config
4141

4242

models/amplify/tests/test_encoder_block.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def data(self) -> torch.Tensor:
5757
@pytest.fixture
5858
def config():
5959
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
60-
config.torch_dtype = torch.bfloat16
60+
config.dtype = torch.bfloat16
6161
return config
6262

6363

@@ -169,7 +169,7 @@ def test_encoder_block_forward(inputs, config):
169169
window_size=(-1, -1),
170170
rotary_pos_interleaved=True,
171171
seq_length=config.max_length,
172-
params_dtype=config.torch_dtype,
172+
params_dtype=config.dtype,
173173
).to("cuda", dtype=torch.bfloat16)
174174

175175
state_dict_mapping = {

models/esm2/src/esm/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def export_hf_checkpoint(tag: str, export_path: Path):
6464
# Smoke test that the model can be loaded.
6565
model_te = AutoModelForMaskedLM.from_pretrained(
6666
export_path / tag,
67-
torch_dtype=torch.bfloat16,
67+
dtype=torch.bfloat16,
6868
trust_remote_code=True,
6969
)
7070
del model_te

models/esm2/src/esm/modeling_esm_te.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def __init__(self, config: NVEsmConfig):
129129
micro_batch_size=config.micro_batch_size,
130130
num_gqa_groups=config.num_attention_heads,
131131
fuse_qkv_params=config.fuse_qkv_params,
132-
params_dtype=config.torch_dtype,
132+
params_dtype=config.dtype,
133133
window_size=(-1, -1),
134134
)
135135
for i in range(config.num_hidden_layers)

models/esm2/tests/test_distributed_strategies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,14 +145,14 @@ def run_forward_backward(use_te: bool, strategy: Strategy, input_data: dict, dis
145145
if use_te:
146146
model = AutoModelForMaskedLM.from_pretrained(
147147
"nvidia/esm2_t6_8M_UR50D",
148-
torch_dtype=torch.bfloat16,
148+
dtype=torch.bfloat16,
149149
trust_remote_code=True,
150150
)
151151
transformer_layers = model.esm.encoder.layers
152152
else:
153153
model = AutoModelForMaskedLM.from_pretrained(
154154
"facebook/esm2_t6_8M_UR50D",
155-
torch_dtype=torch.bfloat16,
155+
dtype=torch.bfloat16,
156156
)
157157
transformer_layers = model.esm.encoder.layer
158158
del model.esm.contact_head # Unused in backwards pass.

recipes/amplify_accelerate_te_fp8/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def main(args: DictConfig):
4141
model = AutoModelForMaskedLM.from_config(
4242
config,
4343
trust_remote_code=True,
44-
torch_dtype=torch.bfloat16,
44+
dtype=torch.bfloat16,
4545
)
4646

4747
train_dataset, eval_dataset, data_collator = create_datasets_and_collator(

0 commit comments

Comments
 (0)