File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed
unit/torch/quantization/plugins Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ def create_tiny_qwen3_moe_dir(
124124
125125##### Qwen3.5 (hybrid linear attention + full attention) #####
126126def get_tiny_qwen3_5 (** config_kwargs ) -> PreTrainedModel :
127+ """Create a tiny Qwen3.5 model with hybrid GatedDeltaNet + full attention layers for testing."""
127128 if Qwen3_5TextConfig is None :
128129 pytest .skip ("Qwen3_5TextConfig not available (requires transformers >= 4.57)" )
129130
Original file line number Diff line number Diff line change @@ -269,6 +269,7 @@ def test_qwen3_5_hybrid_attention_quantize(quant_config):
269269 quant_cfg ["quant_cfg" ].append ({"quantizer_name" : "*in_proj_a*" , "enable" : False })
270270
271271 def calib_fn (model ):
272+ """Run calibration forward passes with dummy inputs."""
272273 x = model .dummy_inputs ["input_ids" ]
273274 for _ in range (2 ):
274275 model (x )
@@ -285,9 +286,9 @@ def calib_fn(model):
285286 has_attn_quantized = False
286287 for name , module in model .named_modules ():
287288 if hasattr (module , "weight_quantizer" ) and hasattr (module , "weight" ):
288- if "linear_attn.in_proj_qkv" in name :
289+ if "linear_attn.in_proj_qkv" in name and module . weight_quantizer . is_enabled :
289290 has_gdn_quantized = True
290- if "self_attn.q_proj" in name :
291+ if "self_attn.q_proj" in name and module . weight_quantizer . is_enabled :
291292 has_attn_quantized = True
292293 assert has_gdn_quantized , "GatedDeltaNet linear layers should be quantized"
293294 assert has_attn_quantized , "Attention linear layers should be quantized"
You can’t perform that action at this time.
0 commit comments