vllm-project · vllm-bot · Feb 17, 2026 · Feb 16, 2026
@@ -336,16 +336,6 @@ def _apply_8bit_weight(
 
             current_index += output_size
 
-            # only update the matmul_states if it is not profile_run
-            if (
-                generation > 0
-                and not self.quant_config.llm_int8_has_fp16_weight
-                and matmul_states[i].CB is not None
-                and matmul_states[i].CxB is not None
-            ):
-                del matmul_states[i].CB
-                qweight[offsets[i] : offsets[i + 1]] = matmul_states[i].CxB
-
         out = out.to(original_type)
 
         if reshape_after_matmul: