|
| 1 | +# SPDX-FileCopyrightText: 2026 ModelCloud.ai |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +from __future__ import annotations |
| 5 | + |
| 6 | +import json |
| 7 | + |
| 8 | +from transformers import AutoConfig |
| 9 | + |
| 10 | +from gptqmodel.models.auto import check_and_get_model_definition |
| 11 | +from gptqmodel.models.definitions.qwen3_5_moe import Qwen3_5_MoeQModel |
| 12 | + |
| 13 | + |
| 14 | +def test_qwen3_6_moe_reuses_the_qwen3_5_moe_transformers_definition(tmp_path): |
| 15 | + """Guard the real Qwen 3.6 MoE config shape shipped on the Hub.""" |
| 16 | + |
| 17 | + layer_types = ["linear_attention", "linear_attention", "linear_attention", "full_attention"] * 10 |
| 18 | + config = { |
| 19 | + "architectures": ["Qwen3_5MoeForConditionalGeneration"], |
| 20 | + "image_token_id": 248056, |
| 21 | + "model_type": "qwen3_5_moe", |
| 22 | + "text_config": { |
| 23 | + "dtype": "bfloat16", |
| 24 | + "full_attention_interval": 4, |
| 25 | + "hidden_size": 2048, |
| 26 | + "layer_types": layer_types, |
| 27 | + "max_position_embeddings": 262144, |
| 28 | + "model_type": "qwen3_5_moe_text", |
| 29 | + "moe_intermediate_size": 512, |
| 30 | + "num_attention_heads": 16, |
| 31 | + "num_experts": 256, |
| 32 | + "num_experts_per_tok": 8, |
| 33 | + "num_hidden_layers": 40, |
| 34 | + "num_key_value_heads": 2, |
| 35 | + "partial_rotary_factor": 0.25, |
| 36 | + "rope_parameters": { |
| 37 | + "mrope_interleaved": True, |
| 38 | + "mrope_section": [11, 11, 10], |
| 39 | + "partial_rotary_factor": 0.25, |
| 40 | + "rope_theta": 10000000, |
| 41 | + "rope_type": "default", |
| 42 | + }, |
| 43 | + "shared_expert_intermediate_size": 512, |
| 44 | + "tie_word_embeddings": False, |
| 45 | + "use_cache": True, |
| 46 | + "vocab_size": 248320, |
| 47 | + }, |
| 48 | + "tie_word_embeddings": False, |
| 49 | + "transformers_version": "4.57.1", |
| 50 | + "video_token_id": 248057, |
| 51 | + "vision_config": { |
| 52 | + "deepstack_visual_indexes": [], |
| 53 | + "depth": 27, |
| 54 | + "hidden_size": 1152, |
| 55 | + "in_channels": 3, |
| 56 | + "intermediate_size": 4304, |
| 57 | + "model_type": "qwen3_5_moe", |
| 58 | + "num_heads": 16, |
| 59 | + "num_position_embeddings": 2304, |
| 60 | + "out_hidden_size": 2048, |
| 61 | + "patch_size": 16, |
| 62 | + "spatial_merge_size": 2, |
| 63 | + "temporal_patch_size": 2, |
| 64 | + }, |
| 65 | + "vision_end_token_id": 248054, |
| 66 | + "vision_start_token_id": 248053, |
| 67 | + } |
| 68 | + model_dir = tmp_path / "qwen3_6_moe" |
| 69 | + model_dir.mkdir() |
| 70 | + (model_dir / "config.json").write_text(json.dumps(config), encoding="utf-8") |
| 71 | + |
| 72 | + resolved_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=False) |
| 73 | + model_definition = check_and_get_model_definition(model_dir, trust_remote_code=False) |
| 74 | + |
| 75 | + assert type(resolved_config).__name__ == "Qwen3_5MoeConfig" |
| 76 | + assert model_definition is Qwen3_5_MoeQModel |
0 commit comments