Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions example_notebooks/vllm/vllm_serve.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "59f98cf9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/aerdem/projects/nvidia/logits-processor-zoo\n"
]
}
],
"source": [
"%cd ../.."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f2a86616",
"metadata": {},
"outputs": [],
"source": [
"# Run vllm serve like this:\n",
"# vllm serve Qwen/Qwen2.5-1.5B-Instruct --dtype auto --api-key lpz-test --logits-processor-pattern \"logits_processor_zoo.vllm\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "13f407ff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n",
"\n",
"### Ingredients:\n",
"- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n",
"- 2 tablespoons vegetable oil\n",
"- 3 cloves garlic, minced\n",
"- 1 tablespoon ginger, grated\n",
"- 1/4 cup soy sauce\n",
"- 1/4 cup oyster sauce\n",
"- 1 teaspoon sugar\n",
"- 1/2 teaspoon salt\n",
"- 1/4 teaspoon black pepper\n",
"- 1 can (8 oz) condensed cream of mushroom soup\n",
"- 1 cup frozen mixed vegetables (such as peas, carrots, corn)\n",
"- 1/2 cup chopped green onions\n",
"- 1/4 cup chopped cilantro\n",
"\n",
"### Instructions:\n",
"\n",
"#### Step 1: Prepare the Chicken\n",
"1. **Marinate the Chicken:** In a bowl, mix together the chicken, soy sauce, oyster sauce, sugar, salt, and black pepper.\n",
"2. **Cook the Chicken:** Heat the vegetable oil in a large skillet over medium-high heat. Add the marinated chicken and cook until browned on all sides, about 5 minutes per side. Remove from the pan and set aside.\n",
"\n",
"#### Step 2: Cook the Vegetables\n",
"1. **Sauté the Vegetables:** In the same skillet, add the remaining 1 tablespoon of oil. Sauté the minced garlic and grated ginger for about 30 seconds until fragrant.\n",
"2. **Add the Mixed Vegetables:** Stir in the frozen mixed vegetables and sauté until they start to soften, about 2-3 minutes.\n",
"3. **Combine Everything:** Return the cooked chicken to the skillet along with the sautéed vegetables. Pour in the condensed cream of mushroom soup and stir well to combine everything.\n",
"\n",
"#### Step 3: Finish Cooking\n",
"1. **Simmer the Sauce:** Bring the mixture to a simmer over low heat. Let it cook for about 5 minutes, stirring occasionally, until the sauce thickens slightly.\n",
"2. **Serve:** Garnish with chopped green onions and cilantro before serving. This dish can be served hot or cold depending on your preference.\n",
"\n",
"Enjoy your homemade fried rice chicken! Adjust the seasoning according to your taste preferences.\n"
]
}
],
"source": [
"from openai import OpenAI\n",
"\n",
"model_name = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
"\n",
"client = OpenAI(\n",
" base_url=\"http://localhost:8000/v1\",\n",
" api_key=\"lpz-test\",\n",
")\n",
"\n",
"completion = client.chat.completions.create(\n",
" model=model_name,\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n",
" ], \n",
" temperature=0,\n",
" top_p=1\n",
")\n",
"\n",
"print(completion.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6227231c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n",
"\n",
"### Ingredients:\n",
"- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n",
"- 2 tablespoons vegetable oil\n",
"- 3 cloves garlic, minced\n",
"- 1 tablespoon ginger, grated\n",
"- 1/4 cup soy sauce\n",
"- 1/4 cup oyster sauce\n",
"- 1 teaspoon sugar\n",
"- 1/2 teaspoon salt\n",
"- 1/4 teaspoon black pepper\n",
"- 1 can (8 oz) condensed cream of mushroom soup\n",
"\n"
]
}
],
"source": [
"completion = client.chat.completions.create(\n",
" model=model_name,\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n",
" ],\n",
" temperature=0,\n",
" top_p=1,\n",
" extra_body={\n",
" \"logits_processors\": [{\n",
" \"qualname\": \"logits_processor_zoo.vllm.GenLengthLogitsProcessor\",\n",
" \"kwargs\": {\"tokenizer\": model_name, \"boost_factor\": 0.2, \"complete_sentences\": True}\n",
" }]\n",
" }\n",
")\n",
"\n",
"print(completion.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96544ec2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
11 changes: 7 additions & 4 deletions logits_processor_zoo/vllm/cite_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# limitations under the License.
#

from typing import List
from typing import List, Union
import torch
from transformers import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, AutoTokenizer


class CiteFromPromptLogitsProcessor:
Expand All @@ -33,11 +33,14 @@ class CiteFromPromptLogitsProcessor:
boost_eos (bool, optional): If True, boosts EOS token too.
conditional_boost_factor (float, optional): A factor to boost the likelihood of the tokens based on previous token.
"""
def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float = 1.0, boost_eos: bool = True,
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float = 1.0, boost_eos: bool = True,
conditional_boost_factor: float = 0.0):
self.tokenizer = tokenizer
if isinstance(self.tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)

self.boost_factor = boost_factor
self.eos_token_id = tokenizer.eos_token_id
self.eos_token_id = self.tokenizer.eos_token_id
self.boost_eos = boost_eos
self.conditional_boost_factor = conditional_boost_factor

Expand Down
20 changes: 12 additions & 8 deletions logits_processor_zoo/vllm/generation_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# limitations under the License.
#

from typing import List
from typing import List, Union
import torch
from transformers import PreTrainedTokenizer
from transformers import PreTrainedTokenizer, AutoTokenizer
from logits_processor_zoo.utils import text_to_token


Expand All @@ -36,18 +36,22 @@ class GenLengthLogitsProcessor:
or a new line. Default is False.
boost_token_str (str, optional): A string to be tokenized and used instead of EOS. Especially useful for </think>.
"""
def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float,
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float,
p: int = 2, complete_sentences: bool = False, boost_token_str: str = None):
self.boost_token = tokenizer.eos_token_id

self.tokenizer = tokenizer
if isinstance(self.tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)

self.boost_token = self.tokenizer.eos_token_id
self.boost_token_str = boost_token_str
if boost_token_str is not None:
self.boost_token = text_to_token(tokenizer, boost_token_str, last=False)
self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False)
self.boost_factor = boost_factor
self.p = p
self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True)
self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True)
self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True)
self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True)
self.complete_sentences = complete_sentences
self.tokenizer = tokenizer

def clone(self):
return GenLengthLogitsProcessor(self.tokenizer, self.boost_factor, self.p,
Expand Down
15 changes: 9 additions & 6 deletions logits_processor_zoo/vllm/last_phrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# limitations under the License.
#

from transformers import PreTrainedTokenizer
from typing import List
from transformers import PreTrainedTokenizer, AutoTokenizer
from typing import List, Union
import torch
from logits_processor_zoo.utils import enforce_tokens

Expand All @@ -31,12 +31,15 @@ class ForceLastPhraseLogitsProcessor:
phrase (str): The phrase to be generated by LLM before the end of its speech.
tokenizer (PreTrainedTokenizer): The tokenizer used by the LLM.
"""
def __init__(self, phrase: str, tokenizer: PreTrainedTokenizer):
self.eos_token_id = tokenizer.eos_token_id
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
def __init__(self, phrase: str, tokenizer: Union[PreTrainedTokenizer, str]):
self.tokenizer = tokenizer
if isinstance(self.tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)

self.eos_token_id = self.tokenizer.eos_token_id
self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False)
self._reset()
self.phrase = phrase
self.tokenizer = tokenizer

# LogitsProcessor can contain a clone attribute to deep copy it
# https://github.com/vllm-project/vllm/blob/19dcc02a72e3ed52e3bf95aae44ea1f40ce42ea0/vllm/sampling_params.py#L537-L550
Expand Down
15 changes: 9 additions & 6 deletions logits_processor_zoo/vllm/multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# limitations under the License.
#

from transformers import PreTrainedTokenizer
from typing import List
from transformers import PreTrainedTokenizer, AutoTokenizer
from typing import List, Union
import torch
from logits_processor_zoo.utils import text_to_token, get_new_line_tokens, enforce_tokens

Expand All @@ -41,17 +41,20 @@ class MultipleChoiceLogitsProcessor:
boost_first_words (float): Nonzero values add choices' first tokens' logits to boost performance.
Especially useful for the models which have difficulty associating the choice with its text.
"""
def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], choices: List[str] = None,
delimiter: str = ".", boost_first_words: float = 0.0):
self.tokenizer = tokenizer
if isinstance(self.tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)

self.choices = choices
self.delimiter = delimiter
if choices is None:
choices = ["1", "2", "3", "4"]

self.new_line_token = get_new_line_tokens(tokenizer)
self.delimiter_token = text_to_token(tokenizer, delimiter, last=False)
self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices]
self.new_line_token = get_new_line_tokens(self.tokenizer)
self.delimiter_token = text_to_token(self.tokenizer, delimiter, last=False)
self.choice_tokens = [text_to_token(self.tokenizer, choice, last=False) for choice in choices]
self.boost_first_words = boost_first_words

def clone(self):
Expand Down
17 changes: 10 additions & 7 deletions logits_processor_zoo/vllm/trigger_phrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# limitations under the License.
#

from transformers import PreTrainedTokenizer
from typing import List
from transformers import PreTrainedTokenizer, AutoTokenizer
from typing import List, Union
import torch
from logits_processor_zoo.utils import text_to_token, enforce_tokens

Expand All @@ -33,14 +33,17 @@ class TriggerPhraseLogitsProcessor:
trigger_count (int): How many times the phrase will be triggered.
trigger_after (bool): Whether the phrase is written after the trigger token or instead of the trigger token.
"""
def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: PreTrainedTokenizer, trigger_count: int = 1,
trigger_after: bool = False):
def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: Union[PreTrainedTokenizer, str],
trigger_count: int = 1, trigger_after: bool = False):
self.tokenizer = tokenizer
if isinstance(self.tokenizer, str):
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)

self.phrase = phrase
self.trigger_token_phrase = trigger_token_phrase
self.tokenizer = tokenizer
self.trigger_count = trigger_count
self.trigger_token = text_to_token(tokenizer, trigger_token_phrase, last=False)
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
self.trigger_token = text_to_token(self.tokenizer, trigger_token_phrase, last=False)
self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False)
self.initial_trigger_count = trigger_count
self.trigger_after = trigger_after
self._reset()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "logits-processor-zoo"
version = "0.1.8"
version = "0.1.9"
description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
readme = "README.md"
Expand Down