Skip to content

Commit 165e136

Browse files
committed
Move cpu_draft .cpu() into asyncio.to_thread in apply_serial_bitmask
1 parent 0476f7c commit 165e136

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

lmdeploy/pytorch/spec_decode/guided_spec_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ async def apply_serial_bitmask(
145145
if not processors or self._mgr is None:
146146
return
147147
forked = {idx: proc.fork() for idx, proc in processors.items()}
148-
cpu_draft = draft_token_ids.cpu()
148+
cpu_draft = await asyncio.to_thread(draft_token_ids.cpu)
149149
batch_size = scores_3d.size(0)
150150
num_expand = scores_3d.size(1)
151151
bitmask = self._mgr.allocate_batched_bitmap(batch_size)

0 commit comments

Comments
 (0)