|
40 | 40 | SEED = 1234 |
41 | 41 |
|
42 | 42 |
|
| 43 | +try: |
| 44 | + from transformers import Qwen3_5TextConfig |
| 45 | +except ImportError: |
| 46 | + Qwen3_5TextConfig = None |
| 47 | + |
| 48 | +try: |
| 49 | + from transformers import Qwen3_5MoeTextConfig |
| 50 | +except ImportError: |
| 51 | + Qwen3_5MoeTextConfig = None |
| 52 | + |
| 53 | + |
| 54 | +##### Qwen3.5 Dense ##### |
| 55 | +def get_tiny_qwen3_5(**config_kwargs) -> PreTrainedModel: |
| 56 | + """Create a tiny Qwen3.5 Dense model (hybrid GatedDeltaNet + Softmax attention). |
| 57 | +
|
| 58 | + Requires ``transformers`` with ``Qwen3_5TextConfig`` support. |
| 59 | + """ |
| 60 | + if Qwen3_5TextConfig is None: |
| 61 | + pytest.skip("transformers does not have Qwen3_5TextConfig") |
| 62 | + |
| 63 | + set_seed(SEED) |
| 64 | + kwargs = { |
| 65 | + "hidden_size": 32, |
| 66 | + "intermediate_size": 32, |
| 67 | + "num_hidden_layers": 4, |
| 68 | + "num_attention_heads": 4, |
| 69 | + "num_key_value_heads": 2, |
| 70 | + "max_position_embeddings": 64, |
| 71 | + "vocab_size": 32, |
| 72 | + "head_dim": 8, |
| 73 | + "short_chunk_size": 32, |
| 74 | + "attn_type": [0, 0, 0, 1], |
| 75 | + } |
| 76 | + kwargs.update(**config_kwargs) |
| 77 | + config = Qwen3_5TextConfig(**kwargs) |
| 78 | + tiny_model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16) |
| 79 | + return tiny_model |
| 80 | + |
| 81 | + |
| 82 | +def create_tiny_qwen3_5_dir( |
| 83 | + tmp_path: Path | str, with_tokenizer: bool = False, return_model: bool = False, **config_kwargs |
| 84 | +) -> Path | tuple[Path, PreTrainedModel]: |
| 85 | + """Save a tiny Qwen3.5 Dense model to disk for testing.""" |
| 86 | + model_dir = Path(tmp_path) / "tiny_qwen3_5" |
| 87 | + if with_tokenizer: |
| 88 | + tokenizer = AutoTokenizer.from_pretrained( |
| 89 | + "hf-internal-testing/tiny-random-LlamaForCausalLM" |
| 90 | + ) |
| 91 | + tokenizer.save_pretrained(model_dir) |
| 92 | + config_kwargs["vocab_size"] = tokenizer.vocab_size |
| 93 | + tiny_model = get_tiny_qwen3_5(**config_kwargs) |
| 94 | + tiny_model.save_pretrained(model_dir) |
| 95 | + |
| 96 | + if return_model: |
| 97 | + return model_dir, tiny_model |
| 98 | + return model_dir |
| 99 | + |
| 100 | + |
| 101 | +##### Qwen3.5 MoE ##### |
| 102 | +def get_tiny_qwen3_5_moe(**config_kwargs) -> PreTrainedModel: |
| 103 | + """Create a tiny Qwen3.5 MoE model (hybrid attention + mixture-of-experts). |
| 104 | +
|
| 105 | + Requires ``transformers`` with ``Qwen3_5MoeTextConfig`` support. |
| 106 | + """ |
| 107 | + if Qwen3_5MoeTextConfig is None: |
| 108 | + pytest.skip("transformers does not have Qwen3_5MoeTextConfig") |
| 109 | + |
| 110 | + set_seed(SEED) |
| 111 | + kwargs = { |
| 112 | + "hidden_size": 32, |
| 113 | + "intermediate_size": 32, |
| 114 | + "moe_intermediate_size": 32, |
| 115 | + "num_hidden_layers": 4, |
| 116 | + "num_attention_heads": 4, |
| 117 | + "num_key_value_heads": 2, |
| 118 | + "max_position_embeddings": 64, |
| 119 | + "vocab_size": 32, |
| 120 | + "head_dim": 8, |
| 121 | + "short_chunk_size": 32, |
| 122 | + "attn_type": [0, 0, 0, 1], |
| 123 | + "num_experts": 4, |
| 124 | + "num_experts_per_tok": 2, |
| 125 | + "decoder_sparse_step": 1, |
| 126 | + } |
| 127 | + kwargs.update(**config_kwargs) |
| 128 | + config = Qwen3_5MoeTextConfig(**kwargs) |
| 129 | + tiny_model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16) |
| 130 | + return tiny_model |
| 131 | + |
| 132 | + |
| 133 | +def create_tiny_qwen3_5_moe_dir( |
| 134 | + tmp_path: Path | str, with_tokenizer: bool = False, **config_kwargs |
| 135 | +) -> Path: |
| 136 | + """Save a tiny Qwen3.5 MoE model to disk for testing.""" |
| 137 | + model_dir = Path(tmp_path) / "tiny_qwen3_5_moe" |
| 138 | + if with_tokenizer: |
| 139 | + tokenizer = AutoTokenizer.from_pretrained( |
| 140 | + "hf-internal-testing/tiny-random-LlamaForCausalLM" |
| 141 | + ) |
| 142 | + tokenizer.save_pretrained(model_dir) |
| 143 | + config_kwargs["vocab_size"] = tokenizer.vocab_size |
| 144 | + get_tiny_qwen3_5_moe(**config_kwargs).save_pretrained(model_dir) |
| 145 | + return model_dir |
| 146 | + |
| 147 | + |
43 | 148 | ##### Qwen3 ##### |
44 | 149 | def get_tiny_qwen3(**config_kwargs) -> PreTrainedModel: |
45 | 150 | set_seed(SEED) |
|
0 commit comments