diff --git a/example_notebooks/vllm/vllm_serve.ipynb b/example_notebooks/vllm/vllm_serve.ipynb new file mode 100644 index 0000000..4c21c5e --- /dev/null +++ b/example_notebooks/vllm/vllm_serve.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "59f98cf9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/aerdem/projects/nvidia/logits-processor-zoo\n" + ] + } + ], + "source": [ + "%cd ../.." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f2a86616", + "metadata": {}, + "outputs": [], + "source": [ + "# Run vllm serve like this:\n", + "# vllm serve Qwen/Qwen2.5-1.5B-Instruct --dtype auto --api-key lpz-test --logits-processor-pattern \"logits_processor_zoo.vllm\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "13f407ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n", + "\n", + "### Ingredients:\n", + "- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n", + "- 2 tablespoons vegetable oil\n", + "- 3 cloves garlic, minced\n", + "- 1 tablespoon ginger, grated\n", + "- 1/4 cup soy sauce\n", + "- 1/4 cup oyster sauce\n", + "- 1 teaspoon sugar\n", + "- 1/2 teaspoon salt\n", + "- 1/4 teaspoon black pepper\n", + "- 1 can (8 oz) condensed cream of mushroom soup\n", + "- 1 cup frozen mixed vegetables (such as peas, carrots, corn)\n", + "- 1/2 cup chopped green onions\n", + "- 1/4 cup chopped cilantro\n", + "\n", + "### Instructions:\n", + "\n", + "#### Step 1: Prepare the Chicken\n", + "1. **Marinate the Chicken:** In a bowl, mix together the chicken, soy sauce, oyster sauce, sugar, salt, and black pepper.\n", + "2. **Cook the Chicken:** Heat the vegetable oil in a large skillet over medium-high heat. Add the marinated chicken and cook until browned on all sides, about 5 minutes per side. Remove from the pan and set aside.\n", + "\n", + "#### Step 2: Cook the Vegetables\n", + "1. **Sauté the Vegetables:** In the same skillet, add the remaining 1 tablespoon of oil. Sauté the minced garlic and grated ginger for about 30 seconds until fragrant.\n", + "2. **Add the Mixed Vegetables:** Stir in the frozen mixed vegetables and sauté until they start to soften, about 2-3 minutes.\n", + "3. **Combine Everything:** Return the cooked chicken to the skillet along with the sautéed vegetables. Pour in the condensed cream of mushroom soup and stir well to combine everything.\n", + "\n", + "#### Step 3: Finish Cooking\n", + "1. **Simmer the Sauce:** Bring the mixture to a simmer over low heat. Let it cook for about 5 minutes, stirring occasionally, until the sauce thickens slightly.\n", + "2. **Serve:** Garnish with chopped green onions and cilantro before serving. This dish can be served hot or cold depending on your preference.\n", + "\n", + "Enjoy your homemade fried rice chicken! Adjust the seasoning according to your taste preferences.\n" + ] + } + ], + "source": [ + "from openai import OpenAI\n", + "\n", + "model_name = \"Qwen/Qwen2.5-1.5B-Instruct\"\n", + "\n", + "client = OpenAI(\n", + " base_url=\"http://localhost:8000/v1\",\n", + " api_key=\"lpz-test\",\n", + ")\n", + "\n", + "completion = client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n", + " ], \n", + " temperature=0,\n", + " top_p=1\n", + ")\n", + "\n", + "print(completion.choices[0].message.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6227231c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n", + "\n", + "### Ingredients:\n", + "- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n", + "- 2 tablespoons vegetable oil\n", + "- 3 cloves garlic, minced\n", + "- 1 tablespoon ginger, grated\n", + "- 1/4 cup soy sauce\n", + "- 1/4 cup oyster sauce\n", + "- 1 teaspoon sugar\n", + "- 1/2 teaspoon salt\n", + "- 1/4 teaspoon black pepper\n", + "- 1 can (8 oz) condensed cream of mushroom soup\n", + "\n" + ] + } + ], + "source": [ + "completion = client.chat.completions.create(\n", + " model=model_name,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n", + " ],\n", + " temperature=0,\n", + " top_p=1,\n", + " extra_body={\n", + " \"logits_processors\": [{\n", + " \"qualname\": \"logits_processor_zoo.vllm.GenLengthLogitsProcessor\",\n", + " \"kwargs\": {\"tokenizer\": model_name, \"boost_factor\": 0.2, \"complete_sentences\": True}\n", + " }]\n", + " }\n", + ")\n", + "\n", + "print(completion.choices[0].message.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96544ec2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/logits_processor_zoo/vllm/cite_prompt.py b/logits_processor_zoo/vllm/cite_prompt.py index 35a7158..fa5303d 100644 --- a/logits_processor_zoo/vllm/cite_prompt.py +++ b/logits_processor_zoo/vllm/cite_prompt.py @@ -15,9 +15,9 @@ # limitations under the License. # -from typing import List +from typing import List, Union import torch -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer, AutoTokenizer class CiteFromPromptLogitsProcessor: @@ -33,11 +33,14 @@ class CiteFromPromptLogitsProcessor: boost_eos (bool, optional): If True, boosts EOS token too. conditional_boost_factor (float, optional): A factor to boost the likelihood of the tokens based on previous token. """ - def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float = 1.0, boost_eos: bool = True, + def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float = 1.0, boost_eos: bool = True, conditional_boost_factor: float = 0.0): self.tokenizer = tokenizer + if isinstance(self.tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + self.boost_factor = boost_factor - self.eos_token_id = tokenizer.eos_token_id + self.eos_token_id = self.tokenizer.eos_token_id self.boost_eos = boost_eos self.conditional_boost_factor = conditional_boost_factor diff --git a/logits_processor_zoo/vllm/generation_length.py b/logits_processor_zoo/vllm/generation_length.py index a050128..bc5b37d 100644 --- a/logits_processor_zoo/vllm/generation_length.py +++ b/logits_processor_zoo/vllm/generation_length.py @@ -15,9 +15,9 @@ # limitations under the License. # -from typing import List +from typing import List, Union import torch -from transformers import PreTrainedTokenizer +from transformers import PreTrainedTokenizer, AutoTokenizer from logits_processor_zoo.utils import text_to_token @@ -36,18 +36,22 @@ class GenLengthLogitsProcessor: or a new line. Default is False. boost_token_str (str, optional): A string to be tokenized and used instead of EOS. Especially useful for . """ - def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float, + def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float, p: int = 2, complete_sentences: bool = False, boost_token_str: str = None): - self.boost_token = tokenizer.eos_token_id + + self.tokenizer = tokenizer + if isinstance(self.tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + + self.boost_token = self.tokenizer.eos_token_id self.boost_token_str = boost_token_str if boost_token_str is not None: - self.boost_token = text_to_token(tokenizer, boost_token_str, last=False) + self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False) self.boost_factor = boost_factor self.p = p - self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True) + self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True) + self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences - self.tokenizer = tokenizer def clone(self): return GenLengthLogitsProcessor(self.tokenizer, self.boost_factor, self.p, diff --git a/logits_processor_zoo/vllm/last_phrase.py b/logits_processor_zoo/vllm/last_phrase.py index a6a750f..b1fe874 100644 --- a/logits_processor_zoo/vllm/last_phrase.py +++ b/logits_processor_zoo/vllm/last_phrase.py @@ -15,8 +15,8 @@ # limitations under the License. # -from transformers import PreTrainedTokenizer -from typing import List +from transformers import PreTrainedTokenizer, AutoTokenizer +from typing import List, Union import torch from logits_processor_zoo.utils import enforce_tokens @@ -31,12 +31,15 @@ class ForceLastPhraseLogitsProcessor: phrase (str): The phrase to be generated by LLM before the end of its speech. tokenizer (PreTrainedTokenizer): The tokenizer used by the LLM. """ - def __init__(self, phrase: str, tokenizer: PreTrainedTokenizer): - self.eos_token_id = tokenizer.eos_token_id - self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False) + def __init__(self, phrase: str, tokenizer: Union[PreTrainedTokenizer, str]): + self.tokenizer = tokenizer + if isinstance(self.tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + + self.eos_token_id = self.tokenizer.eos_token_id + self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False) self._reset() self.phrase = phrase - self.tokenizer = tokenizer # LogitsProcessor can contain a clone attribute to deep copy it # https://github.com/vllm-project/vllm/blob/19dcc02a72e3ed52e3bf95aae44ea1f40ce42ea0/vllm/sampling_params.py#L537-L550 diff --git a/logits_processor_zoo/vllm/multiple_choice.py b/logits_processor_zoo/vllm/multiple_choice.py index 345b274..dbfb050 100644 --- a/logits_processor_zoo/vllm/multiple_choice.py +++ b/logits_processor_zoo/vllm/multiple_choice.py @@ -15,8 +15,8 @@ # limitations under the License. # -from transformers import PreTrainedTokenizer -from typing import List +from transformers import PreTrainedTokenizer, AutoTokenizer +from typing import List, Union import torch from logits_processor_zoo.utils import text_to_token, get_new_line_tokens, enforce_tokens @@ -41,17 +41,20 @@ class MultipleChoiceLogitsProcessor: boost_first_words (float): Nonzero values add choices' first tokens' logits to boost performance. Especially useful for the models which have difficulty associating the choice with its text. """ - def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None, + def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], choices: List[str] = None, delimiter: str = ".", boost_first_words: float = 0.0): self.tokenizer = tokenizer + if isinstance(self.tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + self.choices = choices self.delimiter = delimiter if choices is None: choices = ["1", "2", "3", "4"] - self.new_line_token = get_new_line_tokens(tokenizer) - self.delimiter_token = text_to_token(tokenizer, delimiter, last=False) - self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices] + self.new_line_token = get_new_line_tokens(self.tokenizer) + self.delimiter_token = text_to_token(self.tokenizer, delimiter, last=False) + self.choice_tokens = [text_to_token(self.tokenizer, choice, last=False) for choice in choices] self.boost_first_words = boost_first_words def clone(self): diff --git a/logits_processor_zoo/vllm/trigger_phrase.py b/logits_processor_zoo/vllm/trigger_phrase.py index ccb3bb2..09fa46c 100644 --- a/logits_processor_zoo/vllm/trigger_phrase.py +++ b/logits_processor_zoo/vllm/trigger_phrase.py @@ -15,8 +15,8 @@ # limitations under the License. # -from transformers import PreTrainedTokenizer -from typing import List +from transformers import PreTrainedTokenizer, AutoTokenizer +from typing import List, Union import torch from logits_processor_zoo.utils import text_to_token, enforce_tokens @@ -33,14 +33,17 @@ class TriggerPhraseLogitsProcessor: trigger_count (int): How many times the phrase will be triggered. trigger_after (bool): Whether the phrase is written after the trigger token or instead of the trigger token. """ - def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: PreTrainedTokenizer, trigger_count: int = 1, - trigger_after: bool = False): + def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: Union[PreTrainedTokenizer, str], + trigger_count: int = 1, trigger_after: bool = False): + self.tokenizer = tokenizer + if isinstance(self.tokenizer, str): + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + self.phrase = phrase self.trigger_token_phrase = trigger_token_phrase - self.tokenizer = tokenizer self.trigger_count = trigger_count - self.trigger_token = text_to_token(tokenizer, trigger_token_phrase, last=False) - self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False) + self.trigger_token = text_to_token(self.tokenizer, trigger_token_phrase, last=False) + self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False) self.initial_trigger_count = trigger_count self.trigger_after = trigger_after self._reset() diff --git a/pyproject.toml b/pyproject.toml index b41d61c..e102ec5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "logits-processor-zoo" -version = "0.1.8" +version = "0.1.9" description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks." authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"] readme = "README.md"