@@ -900,6 +900,26 @@ def forward(self, *args, **kwargs):
900900 self ._down_proj_linear = False
901901 return super ().forward (* args , ** kwargs )
902902
903+ def iter_weights_for_calibration (self ):
904+ """Yield ``(weight_slice, quantizer)`` pairs for each expert and weight type.
905+
906+ The base implementation resolves singular ``*_weight_quantizer`` names via
907+ ``quantizer_attr_names``, but fused experts store per-expert quantizers as
908+ ``nn.ModuleList`` attributes (``gate_up_proj_weight_quantizers``,
909+ ``down_proj_weight_quantizers``). Override to yield the per-expert slice
910+ and its corresponding quantizer directly.
911+ """
912+ for weight_name , quantizers_name in (
913+ ("gate_up_proj" , "gate_up_proj_weight_quantizers" ),
914+ ("down_proj" , "down_proj_weight_quantizers" ),
915+ ):
916+ weight = getattr (self , weight_name , None )
917+ quantizers = getattr (self , quantizers_name , None )
918+ if weight is None or quantizers is None :
919+ continue
920+ for idx , q in enumerate (quantizers ):
921+ yield weight [idx ], q
922+
903923 def fold_weight (self , keep_attrs : bool = False ):
904924 """Fold per-expert weight quantizers into the fused 3-D weights.
905925
0 commit comments