|
13 | 13 | from aphrodite.transformers_utils.detokenizer import Detokenizer |
14 | 14 |
|
15 | 15 |
|
| 16 | +def single_step_process_prompt_logprob( |
| 17 | + sg_output_proc: SequenceGroupOutputProcessor, seq_group: SequenceGroup, |
| 18 | + output: SequenceGroupOutput) -> None: |
| 19 | + """Process prompt logprobs associated with the :class:`SequenceGroupOutput` |
| 20 | + for a given step. |
| 21 | + Do nothing if the output has no prompt logprobs. |
| 22 | + Account for the fact that transformers do not compute first-token logprobs. |
| 23 | +
|
| 24 | + Args: |
| 25 | + sg_output_proc: :class:`SequenceGroupOutputProcessor` instance |
| 26 | + seq_group: the output is associated with this :class:`SequenceGroup` |
| 27 | + output: the :class:`SequenceGroupOutput` for a single scheduler step |
| 28 | + """ |
| 29 | + prompt_logprobs = output.prompt_logprobs |
| 30 | + |
| 31 | + # If this is the first (or only) "chunk" of the prefill, we need |
| 32 | + # to prepend None to the list of prompt logprobs. The reason for this |
| 33 | + # is that for N prompt tokens, the Sampler will generate N-1 total |
| 34 | + # prompt logprobs during prefill since the token at idx 0 will not |
| 35 | + # have a logprob associated with it. |
| 36 | + if prompt_logprobs is not None: |
| 37 | + if not seq_group.prompt_logprobs: |
| 38 | + prompt_logprobs = [None] + prompt_logprobs |
| 39 | + seq_group.prompt_logprobs = [] |
| 40 | + |
| 41 | + assert hasattr(sg_output_proc, 'detokenizer') |
| 42 | + if (seq_group.sampling_params.detokenize |
| 43 | + and sg_output_proc.detokenizer): |
| 44 | + sg_output_proc.detokenizer.decode_prompt_logprobs_inplace( |
| 45 | + seq_group, |
| 46 | + prompt_logprobs, |
| 47 | + position_offset=len(seq_group.prompt_logprobs)) |
| 48 | + |
| 49 | + seq_group.prompt_logprobs.extend(prompt_logprobs) |
| 50 | + |
| 51 | + |
16 | 52 | class SingleStepOutputProcessor(SequenceGroupOutputProcessor): |
17 | 53 | """SequenceGroupOutputProcessor which handles "output processing" logic, |
18 | 54 | which happens after the model returns generated token ids and before |
@@ -57,25 +93,16 @@ def process_outputs(self, sequence_group: SequenceGroup, |
57 | 93 |
|
58 | 94 | def process_prompt_logprob(self, seq_group: SequenceGroup, |
59 | 95 | outputs: List[SequenceGroupOutput]) -> None: |
| 96 | + """Process prompt logprobs associated with one step of a single-step- |
| 97 | + scheduled computation. |
| 98 | + |
| 99 | + Args: |
| 100 | + seq_group: the output is associated with this :class:`SequenceGroup` |
| 101 | + output: the :class:`SequenceGroupOutput` for a single scheduler step |
| 102 | + """ |
60 | 103 | assert len(outputs) == 1, ("Single step should only has 1 output.") |
61 | 104 | output = outputs[0] |
62 | | - prompt_logprobs = output.prompt_logprobs |
63 | | - |
64 | | - # If this is the first (or only) "chunk" of the prefill, we need |
65 | | - # to prepend None to the list of prompt logprobs. The reason for this |
66 | | - # is that for N prompt tokens, the Sampler will generate N-1 total |
67 | | - # prompt logprobs during prefill since the token at idx 0 will not |
68 | | - # have a logprob associated with it. |
69 | | - if prompt_logprobs is not None: |
70 | | - if not seq_group.prompt_logprobs: |
71 | | - prompt_logprobs = [None] + prompt_logprobs |
72 | | - seq_group.prompt_logprobs = [] |
73 | | - if seq_group.sampling_params.detokenize and self.detokenizer: |
74 | | - self.detokenizer.decode_prompt_logprobs_inplace( |
75 | | - seq_group, |
76 | | - prompt_logprobs, |
77 | | - position_offset=len(seq_group.prompt_logprobs)) |
78 | | - seq_group.prompt_logprobs.extend(prompt_logprobs) |
| 105 | + single_step_process_prompt_logprob(self, seq_group, output) |
79 | 106 |
|
80 | 107 | def _process_sequence_group_outputs(self, seq_group: SequenceGroup, |
81 | 108 | outputs: SequenceGroupOutput, |
|
0 commit comments