@@ -392,8 +392,10 @@ def _invoke_fused_moe_quantized_function(
392392 # In case the weight quantizer isn't folded yet in vllm_serve_fakequant, pass the
393393 # quantized weight to the kernel.
394394 B = self .w13_weight # noqa: N806
395- original_kernel (A , B , C , * args , ** kwargs )
396- self .w13_weight = original_weight
395+ try :
396+ original_kernel (A , B , C , * args , ** kwargs )
397+ finally :
398+ self .w13_weight = original_weight
397399 else :
398400 original_kernel (A , B , C , * args , ** kwargs )
399401 if self .w13_output_quantizer .is_enabled :
@@ -408,8 +410,10 @@ def _invoke_fused_moe_quantized_function(
408410 # In case the weight quantizer isn't folded yet in vllm_serve_fakequant, pass the
409411 # quantized weight to the kernel.
410412 B = self .w2_weight # noqa: N806
411- original_kernel (A , B , C , * args , ** kwargs )
412- self .w2_weight = original_weight
413+ try :
414+ original_kernel (A , B , C , * args , ** kwargs )
415+ finally :
416+ self .w2_weight = original_weight
413417 else :
414418 original_kernel (A , B , C , * args , ** kwargs )
415419 if self .w2_output_quantizer .is_enabled :
0 commit comments