Skip to content

Commit f2c2b14

Browse files
authored
add revision pins and edit github actions (#1423)
Signed-off-by: Peter St. John <pstjohn@nvidia.com>
1 parent 7c4ecc7 commit f2c2b14

19 files changed

Lines changed: 75 additions & 50 deletions

.github/workflows/unit-tests-recipes.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ jobs:
138138
env:
139139
CI: true
140140
HF_TOKEN: ${{ secrets.HF_TOKEN }}
141+
HF_HOME: /cache/huggingface
141142
strategy:
142143
matrix:
143144
recipe: ${{ fromJson(needs.changed-dirs.outputs.dirs) }}
@@ -155,6 +156,15 @@ jobs:
155156
sparse-checkout: "${{ matrix.recipe.dir }}"
156157
sparse-checkout-cone-mode: false
157158

159+
- name: Cache Hugging Face models
160+
uses: actions/cache@v4
161+
with:
162+
path: /cache/huggingface
163+
key: ${{ runner.os }}-huggingface-${{ matrix.recipe.name }}
164+
restore-keys: |
165+
${{ runner.os }}-huggingface-${{ matrix.recipe.name }}-
166+
${{ runner.os }}-huggingface-
167+
158168
- name: Install dependencies
159169
working-directory: ${{ matrix.recipe.dir }}
160170
run: |
@@ -179,7 +189,6 @@ jobs:
179189
fi
180190
pytest -v .
181191
182-
183192
verify-recipe-tests:
184193
# This job checks the status of the unit-tests matrix and fails if any matrix job failed or was cancelled.
185194
# Use this job as the required check for PRs.

bionemo-recipes/models/amplify/src/amplify/export.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ def export_hf_checkpoint(tag: str, export_path: Path):
5656
tag: The tag of the checkpoint to export.
5757
export_path: The parent path to export the checkpoint to.
5858
"""
59-
model_hf = AutoModel.from_pretrained(f"chandar-lab/{tag}", trust_remote_code=True)
59+
model_hf = AutoModel.from_pretrained(f"chandar-lab/{tag}", trust_remote_code=True, revision="d918a9e8")
6060
model_te = convert_amplify_hf_to_te(model_hf)
6161
model_te.save_pretrained(export_path / tag)
6262

63-
tokenizer = AutoTokenizer.from_pretrained(f"chandar-lab/{tag}")
63+
tokenizer = AutoTokenizer.from_pretrained(f"chandar-lab/{tag}", revision="d918a9e8")
6464
tokenizer.save_pretrained(export_path / tag)
6565

6666
# Patch the config

bionemo-recipes/models/amplify/tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ def requires_fp8(func):
3030

3131
@pytest.fixture
3232
def tokenizer():
33-
return AutoTokenizer.from_pretrained("chandar-lab/AMPLIFY_120M")
33+
return AutoTokenizer.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
3434

3535

3636
@pytest.fixture
3737
def config():
38-
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
38+
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True, revision="d918a9e8")
3939
config.dtype = torch.bfloat16
4040
return config
4141

bionemo-recipes/models/amplify/tests/test_amplify_model.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_te_model_has_all_te_layers(config):
6868

6969

7070
def test_models_have_identical_outputs(input_data):
71-
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
71+
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
7272
model_te = convert_amplify_hf_to_te(model_hf)
7373
input_data = {k: v.to("cuda") for k, v in input_data.items()}
7474

@@ -85,7 +85,7 @@ def test_models_have_identical_outputs(input_data):
8585

8686

8787
def test_converted_model_roundtrip(input_data, tmp_path):
88-
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
88+
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
8989
model_te = convert_amplify_hf_to_te(model_hf)
9090

9191
model_te.save_pretrained(tmp_path / "AMPLIFY_120M")
@@ -108,7 +108,7 @@ def test_converted_model_roundtrip(input_data, tmp_path):
108108

109109

110110
def test_convert_state_dict():
111-
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
111+
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
112112
model_te = convert_amplify_hf_to_te(model_hf)
113113

114114
from amplify.state_dict_convert import _pack_qkv_weight, _pad_bias, _pad_weights, mapping
@@ -171,7 +171,7 @@ def test_convert_state_dict():
171171

172172

173173
def test_hf_trained_model_loss(input_data):
174-
model = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
174+
model = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
175175
model.to("cuda", dtype=torch.bfloat16)
176176
input_data = {k: v.to("cuda") for k, v in input_data.items()}
177177
model.eval()
@@ -182,7 +182,7 @@ def test_hf_trained_model_loss(input_data):
182182

183183

184184
def test_te_trained_model_loss(input_data):
185-
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M")
185+
model_hf = amp_hf.AMPLIFY.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
186186
model = convert_amplify_hf_to_te(model_hf)
187187
model.to("cuda", dtype=torch.bfloat16)
188188
input_data = {k: v.to("cuda") for k, v in input_data.items()}
@@ -194,7 +194,7 @@ def test_te_trained_model_loss(input_data):
194194

195195

196196
def test_hf_reinitialized_model_loss(input_data):
197-
config = amp_hf.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M")
197+
config = amp_hf.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
198198
model = amp_hf.AMPLIFY(config)
199199
model.to("cuda", dtype=torch.bfloat16)
200200
input_data = {k: v.to("cuda") for k, v in input_data.items()}
@@ -207,7 +207,7 @@ def test_hf_reinitialized_model_loss(input_data):
207207

208208

209209
def test_te_reinitialized_model_loss(input_data):
210-
config = amp_te.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M")
210+
config = amp_te.AMPLIFYConfig.from_pretrained("chandar-lab/AMPLIFY_120M", revision="d918a9e8")
211211
model = amp_te.AMPLIFYForMaskedLM(config)
212212
model.to("cuda", dtype=torch.bfloat16)
213213
input_data = {k: v.to("cuda") for k, v in input_data.items()}

bionemo-recipes/models/amplify/tests/test_encoder_block.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def data(self) -> torch.Tensor:
5656

5757
@pytest.fixture
5858
def config():
59-
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
59+
config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True, revision="d918a9e8")
6060
config.dtype = torch.bfloat16
6161
return config
6262

bionemo-recipes/models/amplify/tests/test_rotary_embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_apply_rotary_pos_emb():
2929
key = torch.randn([2, 72, 10, 64], dtype=torch.bfloat16, generator=rng).to("cuda")
3030

3131
# AMPLIFY HF Rope
32-
hf_config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True)
32+
hf_config = AutoConfig.from_pretrained("chandar-lab/AMPLIFY_120M", trust_remote_code=True, revision="d918a9e8")
3333

3434
freqs_cis = precompute_freqs_cis(hf_config.hidden_size // hf_config.num_attention_heads, 72).to("cuda")
3535
q_post, k_post = apply_rotary_emb(query, key, freqs_cis)

bionemo-recipes/models/esm2/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def input_data(tokenizer, tokenized_proteins):
8686

8787
@pytest.fixture
8888
def te_model_checkpoint(tmp_path):
89-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
89+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
9090
model_te = convert_esm_hf_to_te(model_hf)
9191
model_te.save_pretrained(tmp_path / "te_model_checkpoint")
9292
return tmp_path / "te_model_checkpoint"

bionemo-recipes/models/esm2/tests/test_convert.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_convert_te_to_hf_roundtrip():
2222
"""Test that converting HF -> TE -> HF produces the same model."""
2323
from esm.convert import convert_esm_hf_to_te, convert_esm_te_to_hf
2424

25-
model_hf_original = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
25+
model_hf_original = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
2626

2727
model_te = convert_esm_hf_to_te(model_hf_original)
2828
model_hf_converted = convert_esm_te_to_hf(model_te)
@@ -42,7 +42,7 @@ def test_qkv_unpacking():
4242
"""Test that QKV unpacking works correctly."""
4343
from esm.convert import convert_esm_hf_to_te, convert_esm_te_to_hf
4444

45-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
45+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
4646
model_te = convert_esm_hf_to_te(model_hf)
4747
model_hf_converted = convert_esm_te_to_hf(model_te)
4848

@@ -64,7 +64,7 @@ def test_config_conversion():
6464
"""Test that config conversion works correctly."""
6565
from esm.convert import convert_esm_hf_to_te, convert_esm_te_to_hf
6666

67-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
67+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
6868
model_te = convert_esm_hf_to_te(model_hf)
6969
model_hf_converted = convert_esm_te_to_hf(model_te)
7070

@@ -97,7 +97,7 @@ def test_padding_unpadding_operations():
9797
"""Test that padding and unpadding operations work correctly for embeddings and decoder weights."""
9898
from esm.convert import convert_esm_hf_to_te, convert_esm_te_to_hf
9999

100-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
100+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
101101
model_te = convert_esm_hf_to_te(model_hf)
102102
model_hf_converted = convert_esm_te_to_hf(model_te)
103103

@@ -146,7 +146,7 @@ def test_weight_initialization_matches_hf():
146146

147147
set_seed(42)
148148

149-
config_hf = AutoConfig.from_pretrained("facebook/esm2_t6_8M_UR50D", vocab_size=64)
149+
config_hf = AutoConfig.from_pretrained("facebook/esm2_t6_8M_UR50D", vocab_size=64, revision="c731040f")
150150
model_hf = EsmForMaskedLM(config_hf)
151151
model_te_converted = convert_esm_hf_to_te(model_hf)
152152

bionemo-recipes/models/esm2/tests/test_cp_bshd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def get_te_model_checkpoint(tmp_path):
7272
Returns:
7373
The path to the saved model checkpoint.
7474
"""
75-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
75+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
7676
model_te = convert_esm_hf_to_te(model_hf, attn_mask_type="no_mask", attn_input_format="bshd")
7777
model_te.save_pretrained(tmp_path / "te_model_checkpoint")
7878
return tmp_path / "te_model_checkpoint"
@@ -183,7 +183,7 @@ def test_context_parallel_equivalence_2process():
183183
model_ckpt = get_te_model_checkpoint(tmp_path)
184184

185185
# Create tokenizer for real protein sequences
186-
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D")
186+
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
187187
input_data_bshd_padded_dp0 = get_dummy_data_bshd_with_padding_dp0(tokenizer=tokenizer)
188188

189189
model = NVEsmForMaskedLM.from_pretrained(

bionemo-recipes/models/esm2/tests/test_cp_thd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def get_te_model_checkpoint(tmp_path):
8282
Returns:
8383
The path to the saved model checkpoint.
8484
"""
85-
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
85+
model_hf = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
8686
model_te = convert_esm_hf_to_te(model_hf)
8787
model_te.save_pretrained(tmp_path / "te_model_checkpoint")
8888
return tmp_path / "te_model_checkpoint"
@@ -174,7 +174,7 @@ def test_context_parallel_equivalence_2process():
174174
model_ckpt = get_te_model_checkpoint(tmp_path)
175175

176176
# Create tokenizer for real protein sequences
177-
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D")
177+
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D", revision="c731040f")
178178
input_data_thd_padded_dp0 = get_dummy_data_thd_with_padding_dp0(tokenizer)
179179

180180
model = NVEsmForMaskedLM.from_pretrained(

0 commit comments

Comments
 (0)