|
20 | 20 | import torch |
21 | 21 | from transformer_engine.pytorch.attention import InferenceParams |
22 | 22 | from transformers import ( |
| 23 | + AutoConfig, |
23 | 24 | AutoModelForCausalLM, |
24 | 25 | AutoTokenizer, |
25 | 26 | DataCollatorWithFlattening, |
@@ -111,11 +112,12 @@ def test_llama_model_forward_pass_thd_inputs(input_text): |
111 | 112 | @pytest.mark.parametrize( |
112 | 113 | "upstream_model_name", ["meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.1-8B-Instruct"] |
113 | 114 | ) |
114 | | -def test_llama_model_golden_values(input_text, upstream_model_name: str): |
| 115 | +@pytest.mark.parametrize("attn_input_format", ["thd", "bshd"]) |
| 116 | +def test_llama_model_golden_values(input_text, upstream_model_name: str, attn_input_format: str): |
115 | 117 | tokenizer = AutoTokenizer.from_pretrained(upstream_model_name) |
116 | 118 | model_hf = AutoModelForCausalLM.from_pretrained(upstream_model_name, dtype=torch.bfloat16) |
117 | 119 |
|
118 | | - model_te = convert_llama_hf_to_te(model_hf) |
| 120 | + model_te = convert_llama_hf_to_te(model_hf, attn_input_format=attn_input_format) |
119 | 121 |
|
120 | 122 | tokenizer.pad_token = tokenizer.eos_token |
121 | 123 | # TODO: figure out padding_side="left" with TE, make this several tests with different input types. |
@@ -370,3 +372,67 @@ def test_te_llama_model_generate_with_cache_bshd_beam_search(): |
370 | 372 | generated_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True) |
371 | 373 | assert "http://www.apache.org/licenses/LICENSE-2.0" in generated_text[0] |
372 | 374 | assert "et dolore magna aliqua. Ut enim ad minim " in generated_text[1] |
| 375 | + |
| 376 | + |
| 377 | +@pytest.mark.parametrize("attn_input_format", ["thd", "bshd"]) |
| 378 | +def test_loss_with_random_weights_for_input_gene_sequence(recipe_path, attn_input_format: str): |
| 379 | + tokenizer = AutoTokenizer.from_pretrained(recipe_path / "nucleotide_fast_tokenizer") |
| 380 | + input_text = "GCACGGTCTGCACCACCGTCTGCCCGGTCAGCGGCGTTAACCCGCGCTATCCCGGTCCGAAACAGGCCGGGCCGGACGGCGAGCGCCTTCGTCTGAAGGA" |
| 381 | + |
| 382 | + inputs = tokenizer(input_text, return_tensors="pt") |
| 383 | + inputs = {k: v.to("cuda") for k, v in inputs.items()} |
| 384 | + labels = inputs["input_ids"].clone() |
| 385 | + |
| 386 | + # This unsloth config is identical to the meta-llama/Llama-3.2-1B config, but is available in CI without having to |
| 387 | + # sign the EULA. Since we don't need any weights here, we can just use this model tag instead. |
| 388 | + config = AutoConfig.from_pretrained("unsloth/Llama-3.2-1B-Instruct") |
| 389 | + model_hf = AutoModelForCausalLM.from_config(config) |
| 390 | + |
| 391 | + model_hf.to("cuda") |
| 392 | + with torch.no_grad(): |
| 393 | + outputs_hf = model_hf(**inputs, labels=labels, output_hidden_states=True) |
| 394 | + loss_hf = outputs_hf.loss |
| 395 | + |
| 396 | + del model_hf |
| 397 | + gc.collect() |
| 398 | + torch.cuda.empty_cache() |
| 399 | + |
| 400 | + config_te = NVLlamaConfig.from_pretrained("unsloth/Llama-3.2-1B-Instruct", attn_input_format=attn_input_format) |
| 401 | + model_te = NVLlamaForCausalLM(config_te) |
| 402 | + |
| 403 | + model_te.to("cuda") |
| 404 | + with torch.no_grad(): |
| 405 | + outputs_te = model_te(**inputs, labels=labels, output_hidden_states=True) |
| 406 | + loss_te = outputs_te.loss |
| 407 | + |
| 408 | + torch.testing.assert_close(loss_te, loss_hf, atol=0.5, rtol=0.05) |
| 409 | + |
| 410 | + |
| 411 | +@pytest.mark.parametrize("attn_input_format", ["thd", "bshd"]) |
| 412 | +def test_loss_with_random_weights_similar_grad_norms(recipe_path, attn_input_format: str): |
| 413 | + tokenizer = AutoTokenizer.from_pretrained(recipe_path / "nucleotide_fast_tokenizer") |
| 414 | + input_text = "GCACGGTCTGCACCACCGTCTGCCCGGTCAGCGGCGTTAACCCGCGCTATCCCGGTCCGAAACAGGCCGGGCCGGACGGCGAGCGCCTTCGTCTGAAGGA" |
| 415 | + |
| 416 | + inputs = tokenizer(input_text, return_tensors="pt") |
| 417 | + inputs = {k: v.to("cuda") for k, v in inputs.items()} |
| 418 | + labels = inputs["input_ids"].clone() |
| 419 | + |
| 420 | + config = AutoConfig.from_pretrained("unsloth/Llama-3.2-1B-Instruct") |
| 421 | + model_hf = AutoModelForCausalLM.from_config(config) |
| 422 | + model_te = convert_llama_hf_to_te(model_hf, attn_input_format=attn_input_format) |
| 423 | + |
| 424 | + model_hf.to("cuda") |
| 425 | + model_hf.train() |
| 426 | + outputs_hf = model_hf(**inputs, labels=labels, output_hidden_states=True) |
| 427 | + loss_hf = outputs_hf.loss |
| 428 | + loss_hf.backward() |
| 429 | + grad_norm_hf = torch.nn.utils.clip_grad_norm_(model_hf.parameters(), max_norm=float("inf")) |
| 430 | + |
| 431 | + model_te.to("cuda") |
| 432 | + model_te.train() |
| 433 | + outputs_te = model_te(**inputs, labels=labels, output_hidden_states=True) |
| 434 | + loss_te = outputs_te.loss |
| 435 | + loss_te.backward() |
| 436 | + grad_norm_te = torch.nn.utils.clip_grad_norm_(model_te.parameters(), max_norm=float("inf")) |
| 437 | + |
| 438 | + torch.testing.assert_close(grad_norm_te, grad_norm_hf) |
0 commit comments