memory refinement

Fridah-nv · Fridah-nv · commit 7eaec3d4dab6 · 2026-06-05T22:42:04.000Z
Signed-off-by: Fridah-nv &lt;201670829+Fridah-nv@users.noreply.github.com&gt;
diff --git a/modelopt/torch/quantization/model_calib.py b/modelopt/torch/quantization/model_calib.py
@@ -804,6 +804,15 @@ def capture(weight_quantizer, weight, input_tensor):
         error_func_for=lambda q: error_funcs.get(id(q)),
     )
 
+    # Free the per-block Hessians (pinned by error_func closures) and the sweep's cached
+    # allocations so export starts from a defragmented allocator.
+    error_funcs.clear()
+    for module in name_to_module.values():
+        if isinstance(module, TensorQuantizer) and isinstance(module._calibrator, MseCalibrator):
+            module._calibrator._error_func = None
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
     if debug:
         model._local_hessian_accumulators = accumulators