Skip to content

Commit ecc143b

Browse files
committed
fix: correct misleading docstrings and rename _apply_guided_bitmask → _prepare_guided_bitmask
- _apply_guided_bitmask: rename to _prepare_guided_bitmask and update docstring to clarify it only allocates/fills the bitmask; callers (and Eagle3 vocab translation) are responsible for actual application. - _accept_guided_tokens: replace 'original grammar matchers' with 'provided grammar matchers' since they are typically forked copies created by SpecModelAgent.
1 parent 6f4b612 commit ecc143b

3 files changed

Lines changed: 13 additions & 7 deletions

File tree

lmdeploy/pytorch/spec_decode/proposers/base.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,14 @@ def __init__(self, specdecode_config: SpecDecodeConfig, device: torch.device = N
6868
# Set by SpecModelAgent after construction
6969
self.guided_decoding_manager = None
7070

71-
async def _apply_guided_bitmask(self, logits: torch.Tensor,
71+
async def _prepare_guided_bitmask(self, logits: torch.Tensor,
7272
guided_processors: dict | None) -> torch.Tensor | None:
73-
"""Apply guided-decoding bitmask to draft logits and return the
74-
allocated bitmask (caller may need it for e.g. d2t translation).
73+
"""Allocate and fill a guided-decoding bitmask for draft logits.
7574
76-
If no guided processors are active, returns None.
75+
Returns the filled bitmask tensor (or None if no guided processors are
76+
active). The caller is responsible for actually applying the bitmask to
77+
logits — some proposers (e.g. Eagle3) may need to translate the bitmask
78+
to their draft vocabulary first.
7779
7880
CPU-bound xgrammar ``fill_bitmap`` calls are offloaded to a thread
7981
so they don't block the asyncio event loop.
@@ -92,7 +94,11 @@ def _fill():
9294

9395
async def _accept_guided_tokens(self, draft_token_ids: torch.Tensor,
9496
guided_processors: dict | None):
95-
"""Accept draft tokens on the original grammar matchers.
97+
"""Accept draft tokens on the provided grammar matchers.
98+
99+
In speculative decoding the matchers are typically forked from the
100+
originals (created in ``SpecModelAgent._async_model_forward``), so this
101+
method accepts on whichever matchers are passed in.
96102
97103
CPU-bound xgrammar ``accept_token`` calls are offloaded to a thread
98104
so they don't block the asyncio event loop.

lmdeploy/pytorch/spec_decode/proposers/deepseek_mtp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async def get_outputs(self,
3232

3333
logits = self.get_logits(hidden_states)[0]
3434

35-
guided_bitmask = await self._apply_guided_bitmask(logits, guided_processors)
35+
guided_bitmask = await self._prepare_guided_bitmask(logits, guided_processors)
3636
if guided_bitmask is not None:
3737
self.guided_decoding_manager.apply_batched_bitmap(logits, guided_bitmask)
3838

lmdeploy/pytorch/spec_decode/proposers/eagle3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ async def get_outputs(self,
113113

114114
logits = self.get_logits(hidden_states)[0]
115115

116-
guided_bitmask = await self._apply_guided_bitmask(logits, guided_processors)
116+
guided_bitmask = await self._prepare_guided_bitmask(logits, guided_processors)
117117
if guided_bitmask is not None:
118118
draft_bitmask = self._translate_bitmask(guided_bitmask)
119119
self.guided_decoding_manager.apply_batched_bitmap(logits, draft_bitmask)

0 commit comments

Comments
 (0)