fix: fix OOM (#2285)

yuki-97 · web-flow · commit 043775c8ee68 · 2026-04-17T22:29:44.000-07:00
Signed-off-by: Yuki Huang &lt;yukih@nvidia.com&gt;
diff --git a/nemo_rl/models/generation/vllm/vllm_backend.py b/nemo_rl/models/generation/vllm/vllm_backend.py
@@ -207,6 +207,7 @@ def update_weights_via_ipc_zmq(self) -> bool:
                 ipc_handle, list_keys, used_bytes = payload
                 buffer = rebuild_cuda_tensor_from_ipc(ipc_handle, self.device.index)
 
+                weight = None
                 weights = []
                 offset = 0
                 for key in list_keys:
@@ -258,7 +259,8 @@ def update_weights_via_ipc_zmq(self) -> bool:
                 # copied the data, Python may not garbage collect these view objects immediately.
                 # If sender reuses the buffer before GC runs, old views would read corrupted data.
                 # Explicit del ensures immediate cleanup before sending ACK.
-                del weights, policy_weights, draft_weights, buffer
+                del weight, weights, policy_weights, draft_weights, buffer
+                weight = None
                 weights = None
                 policy_weights = None
                 draft_weights = None