Skip to content

Commit 482f492

Browse files
committed
minor
Signed-off-by: Kinjal Patel <kinjalpravin@nvidia.com>
1 parent dd948e5 commit 482f492

1 file changed

Lines changed: 2 additions & 8 deletions

File tree

  • modelopt/torch/quantization/plugins

modelopt/torch/quantization/plugins/vllm.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,7 @@ def _invoke_fused_moe_quantized_function(
387387
if self.w13_weight_quantizer.is_enabled: # pragma: no cover
388388
original_weight = self.w13_weight
389389
B = self.w13_weight_quantizer(original_weight) # noqa: N806
390-
try:
391-
original_kernel(A, B, C, *args, **kwargs)
392-
finally:
393-
self.w13_weight = original_weight
390+
original_kernel(A, B, C, *args, **kwargs)
394391
else:
395392
original_kernel(A, B, C, *args, **kwargs)
396393
if self.w13_output_quantizer.is_enabled:
@@ -400,10 +397,7 @@ def _invoke_fused_moe_quantized_function(
400397
if self.w2_weight_quantizer.is_enabled: # pragma: no cover
401398
original_weight = self.w2_weight
402399
B = self.w2_weight_quantizer(original_weight) # noqa: N806
403-
try:
404-
original_kernel(A, B, C, *args, **kwargs)
405-
finally:
406-
self.w2_weight = original_weight
400+
original_kernel(A, B, C, *args, **kwargs)
407401
else:
408402
original_kernel(A, B, C, *args, **kwargs)
409403
if self.w2_output_quantizer.is_enabled:

0 commit comments

Comments
 (0)