Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions logits_processor_zoo/vllm/cite_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@ class CiteFromPromptLogitsProcessor:
boost_eos (bool, optional): If True, boosts EOS token too.
"""
def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float = 1.0, boost_eos: bool = True):
self.tokenizer = tokenizer
self.boost_factor = boost_factor
self.eos_token_id = tokenizer.eos_token_id
self.boost_eos = boost_eos

def clone(self):
return CiteFromPromptLogitsProcessor(self.tokenizer, self.boost_factor, self.boost_eos)

def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
tokens = set(prompt_tokens_ids)
if self.boost_eos:
Expand Down
5 changes: 5 additions & 0 deletions logits_processor_zoo/vllm/generation_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float,
self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True)
self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True)
self.complete_sentences = complete_sentences
self.tokenizer = tokenizer

def clone(self):
return GenLengthLogitsProcessor(self.tokenizer, self.boost_factor, self.p,
self.complete_sentences, self.boost_token_str)

def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
gen_length = len(past_token_ids)
Expand Down
7 changes: 7 additions & 0 deletions logits_processor_zoo/vllm/last_phrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ def __init__(self, phrase: str, tokenizer: PreTrainedTokenizer):
self.eos_token_id = tokenizer.eos_token_id
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
self._reset()
self.phrase = phrase
self.tokenizer = tokenizer

# LogitsProcessor can contain a clone attribute to deep copy it
# https://github.com/vllm-project/vllm/blob/19dcc02a72e3ed52e3bf95aae44ea1f40ce42ea0/vllm/sampling_params.py#L537-L550
def clone(self):
return ForceLastPhraseLogitsProcessor(self.phrase, self.tokenizer)

def _reset(self):
self.index = 0
Expand Down
6 changes: 6 additions & 0 deletions logits_processor_zoo/vllm/multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class MultipleChoiceLogitsProcessor:
"""
def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
delimiter: str = ".", boost_first_words: float = 0.0):
self.tokenizer = tokenizer
self.choices = choices
self.delimiter = delimiter
if choices is None:
choices = ["1", "2", "3", "4"]

Expand All @@ -52,6 +55,9 @@ def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
self.boost_first_words = boost_first_words
self.very_large_number = 999

def clone(self):
return MultipleChoiceLogitsProcessor(self.tokenizer, self.choices, self.delimiter, self.boost_first_words)

def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor:

if self.boost_first_words:
Expand Down
8 changes: 8 additions & 0 deletions logits_processor_zoo/vllm/trigger_phrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,21 @@ class TriggerPhraseLogitsProcessor:
"""
def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: PreTrainedTokenizer, trigger_count: int = 1,
trigger_after: bool = False):
self.phrase = phrase
self.trigger_token_phrase = trigger_token_phrase
self.tokenizer = tokenizer
self.trigger_count = trigger_count
Comment thread
aerdem4 marked this conversation as resolved.
self.trigger_token = text_to_token(tokenizer, trigger_token_phrase, last=False)
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
self.initial_trigger_count = trigger_count
self.trigger_after = trigger_after
self.very_large_number = 999
self._reset()

def clone(self):
return TriggerPhraseLogitsProcessor(self.phrase, self.trigger_token_phrase, self.tokenizer,
self.initial_trigger_count, self.trigger_after)

def _reset(self):
self.index = -1
self.trigger_count = self.initial_trigger_count
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "logits-processor-zoo"
version = "0.1.4"
version = "0.1.5"
description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
readme = "README.md"
Expand Down