update Qwen3.5 ut

deepindeed2022 · deepindeed2022 · commit 95b3487ca9c3 · 2026-04-13T01:44:06.000Z
diff --git a/tests/_test_utils/torch/transformers_models.py b/tests/_test_utils/torch/transformers_models.py
@@ -124,6 +124,7 @@ def create_tiny_qwen3_moe_dir(
 
 ##### Qwen3.5 (hybrid linear attention + full attention) #####
 def get_tiny_qwen3_5(**config_kwargs) -> PreTrainedModel:
+    """Create a tiny Qwen3.5 model with hybrid GatedDeltaNet + full attention layers for testing."""
     if Qwen3_5TextConfig is None:
         pytest.skip("Qwen3_5TextConfig not available (requires transformers >= 4.57)")
 
diff --git a/tests/unit/torch/quantization/plugins/test_huggingface.py b/tests/unit/torch/quantization/plugins/test_huggingface.py
@@ -269,6 +269,7 @@ def test_qwen3_5_hybrid_attention_quantize(quant_config):
     quant_cfg["quant_cfg"].append({"quantizer_name": "*in_proj_a*", "enable": False})
 
     def calib_fn(model):
+        """Run calibration forward passes with dummy inputs."""
         x = model.dummy_inputs["input_ids"]
         for _ in range(2):
             model(x)
@@ -285,9 +286,9 @@ def calib_fn(model):
     has_attn_quantized = False
     for name, module in model.named_modules():
         if hasattr(module, "weight_quantizer") and hasattr(module, "weight"):
-            if "linear_attn.in_proj_qkv" in name:
+            if "linear_attn.in_proj_qkv" in name and module.weight_quantizer.is_enabled:
                 has_gdn_quantized = True
-            if "self_attn.q_proj" in name:
+            if "self_attn.q_proj" in name and module.weight_quantizer.is_enabled:
                 has_attn_quantized = True
     assert has_gdn_quantized, "GatedDeltaNet linear layers should be quantized"
     assert has_attn_quantized, "Attention linear layers should be quantized"