Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions example_notebooks/vllm/text_retrieval_logits_processor.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "28ed6952",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/aerdem/projects/nvidia/logits-processor-zoo\n"
]
}
],
"source": [
"%cd ../.."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b89279fe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 05-21 13:05:12 [__init__.py:239] Automatically detected platform cuda.\n",
"WARNING 05-21 13:05:15 [config.py:2972] Casting torch.bfloat16 to torch.float16.\n",
"INFO 05-21 13:05:20 [config.py:717] This model supports multiple tasks: {'embed', 'classify', 'score', 'reward', 'generate'}. Defaulting to 'generate'.\n",
"WARNING 05-21 13:05:20 [cuda.py:93] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used\n",
"INFO 05-21 13:05:20 [llm_engine.py:240] Initializing a V0 LLM engine (v0.8.5.post1) with config: model='Qwen/Qwen2.5-1.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-1.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=Qwen/Qwen2.5-1.5B-Instruct, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=None, chunked_prefill_enabled=False, use_async_output_proc=False, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={\"splitting_ops\":[],\"compile_sizes\":[],\"cudagraph_capture_sizes\":[],\"max_capture_size\":0}, use_cached_outputs=False, \n",
"INFO 05-21 13:05:22 [cuda.py:292] Using Flash Attention backend.\n",
"INFO 05-21 13:05:22 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0\n",
"INFO 05-21 13:05:22 [model_runner.py:1108] Starting to load model Qwen/Qwen2.5-1.5B-Instruct...\n",
"INFO 05-21 13:05:23 [weight_utils.py:265] Using model weights format ['*.safetensors']\n",
"INFO 05-21 13:05:23 [weight_utils.py:315] No model.safetensors.index.json found in remote.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a62d6b15778b4da397cb4f540673f035",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00<?, ?it/s]\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 05-21 13:05:24 [loader.py:458] Loading weights took 0.61 seconds\n",
"INFO 05-21 13:05:24 [model_runner.py:1140] Model loading took 2.8876 GiB and 1.669984 seconds\n",
"INFO 05-21 13:05:26 [worker.py:287] Memory profiling takes 1.72 seconds\n",
"INFO 05-21 13:05:26 [worker.py:287] the current vLLM instance can use total_gpu_memory (23.66GiB) x gpu_memory_utilization (0.90) = 21.29GiB\n",
"INFO 05-21 13:05:26 [worker.py:287] model weights take 2.89GiB; non_torch_memory takes 0.05GiB; PyTorch activation peak memory takes 2.02GiB; the rest of the memory reserved for KV Cache is 16.33GiB.\n",
"INFO 05-21 13:05:26 [executor_base.py:112] # cuda blocks: 38225, # CPU blocks: 9362\n",
"INFO 05-21 13:05:26 [executor_base.py:117] Maximum concurrency for 32768 tokens per request: 18.66x\n",
"INFO 05-21 13:05:27 [llm_engine.py:437] init engine (profile, create kv cache, warmup model) took 3.25 seconds\n"
]
}
],
"source": [
"from example_notebooks.vllm.utils import vLLMRunner\n",
"from logits_processor_zoo.vllm import TextRetrievalLogitsProcessor\n",
"\n",
"runner = vLLMRunner()"
]
},
{
"cell_type": "markdown",
"id": "859aef8d",
"metadata": {},
"source": [
"## Default Response"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cbf4c2d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prompt: \n",
"Ghost of Tsushima:\n",
"\n",
"Knowing that another frontal attack would only result in more losses, Jin infiltrates the keep to sneak poison into the Mongols' airag and kill Ryuzo. However, he again misses the Khan, who has retreated to a stronghold in the north. Despite the castle being taken without the samurai suffering any further losses, Shimura is furious with Jin, as his actions have violated the samurai code of honor. Jin retorts by criticizing the ineffectiveness of the code against the dishonorable Mongols. Knowing that the Shogun will have Jin executed for treason, Shimura urges him to use Yuna as a scapegoat, but Jin refuses and embraces his persona as \"The Ghost.\" Shimura regretfully arrests Jin and burns the decree of adopting Jin as his son. Jin's allies remain loyal to him and help him escape captivity, but his horse is fatally shot by archers in the process. Jin travels north and learns that the Mongols have learned how to craft his poison, which they intend to use in their assault on the Japanese mainland. Before gathering his allies, claiming a new horse, and assaulting the Khan's final stronghold in Port Izumi, Jin leaves a note for Shimura in his castle asking him to join the effort with the samurai, which he does. With the bulk of the Mongol forces distracted, Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai. Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin. Reminiscing about what they have both lost, Jin and Shimura reluctantly duel each other, with Jin emerging victorious. Jin can kill Shimura to give him a proper warrior's death or abandon the samurai code completely and spare his life. Regardless, Jin becomes the enemy of the Shogun but continues protecting Tsushima.\n",
"\n",
"Where did Jin kill Khotun Khan?\n",
"\n",
"Jin killed Khotun Khan on his flagship.\n",
"-----END-----\n",
"\n",
"Prompt: \n",
"Ghost of Tsushima:\n",
"\n",
"Knowing that another frontal attack would only result in more losses, Jin infiltrates the keep to sneak poison into the Mongols' airag and kill Ryuzo. However, he again misses the Khan, who has retreated to a stronghold in the north. Despite the castle being taken without the samurai suffering any further losses, Shimura is furious with Jin, as his actions have violated the samurai code of honor. Jin retorts by criticizing the ineffectiveness of the code against the dishonorable Mongols. Knowing that the Shogun will have Jin executed for treason, Shimura urges him to use Yuna as a scapegoat, but Jin refuses and embraces his persona as \"The Ghost.\" Shimura regretfully arrests Jin and burns the decree of adopting Jin as his son. Jin's allies remain loyal to him and help him escape captivity, but his horse is fatally shot by archers in the process. Jin travels north and learns that the Mongols have learned how to craft his poison, which they intend to use in their assault on the Japanese mainland. Before gathering his allies, claiming a new horse, and assaulting the Khan's final stronghold in Port Izumi, Jin leaves a note for Shimura in his castle asking him to join the effort with the samurai, which he does. With the bulk of the Mongol forces distracted, Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai. Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin. Reminiscing about what they have both lost, Jin and Shimura reluctantly duel each other, with Jin emerging victorious. Jin can kill Shimura to give him a proper warrior's death or abandon the samurai code completely and spare his life. Regardless, Jin becomes the enemy of the Shogun but continues protecting Tsushima.\n",
"\n",
"Who disbanded Clan Sakai?\n",
"\n",
"According to the information provided in the Ghost of Tsushima game, the Shogun disbanded Clan Sakai.\n",
"-----END-----\n",
"\n"
]
}
],
"source": [
"story = \"\"\"\n",
"Knowing that another frontal attack would only result in more losses, Jin infiltrates the keep to sneak poison into the Mongols' airag and kill Ryuzo. However, he again misses the Khan, who has retreated to a stronghold in the north. Despite the castle being taken without the samurai suffering any further losses, Shimura is furious with Jin, as his actions have violated the samurai code of honor. Jin retorts by criticizing the ineffectiveness of the code against the dishonorable Mongols. Knowing that the Shogun will have Jin executed for treason, Shimura urges him to use Yuna as a scapegoat, but Jin refuses and embraces his persona as \"The Ghost.\" Shimura regretfully arrests Jin and burns the decree of adopting Jin as his son. Jin's allies remain loyal to him and help him escape captivity, but his horse is fatally shot by archers in the process. Jin travels north and learns that the Mongols have learned how to craft his poison, which they intend to use in their assault on the Japanese mainland. Before gathering his allies, claiming a new horse, and assaulting the Khan's final stronghold in Port Izumi, Jin leaves a note for Shimura in his castle asking him to join the effort with the samurai, which he does. With the bulk of the Mongol forces distracted, Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai. Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin. Reminiscing about what they have both lost, Jin and Shimura reluctantly duel each other, with Jin emerging victorious. Jin can kill Shimura to give him a proper warrior's death or abandon the samurai code completely and spare his life. Regardless, Jin becomes the enemy of the Shogun but continues protecting Tsushima.\n",
"\"\"\"\n",
"\n",
"example_prompts =[\n",
"f\"\"\"\n",
"Ghost of Tsushima:\n",
"{story}\n",
"Where did Jin kill Khotun Khan?\n",
"\"\"\",\n",
" \n",
"f\"\"\"\n",
"Ghost of Tsushima:\n",
"{story}\n",
"Who disbanded Clan Sakai?\n",
"\"\"\"\n",
"]\n",
"\n",
"runner.generate_response(example_prompts)"
]
},
{
"cell_type": "markdown",
"id": "88bc2f8a",
"metadata": {},
"source": [
"## Retrieve passage from text"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7d74eb26",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prompt: \n",
"Ghost of Tsushima:\n",
"\n",
"Knowing that another frontal attack would only result in more losses, Jin infiltrates the keep to sneak poison into the Mongols' airag and kill Ryuzo. However, he again misses the Khan, who has retreated to a stronghold in the north. Despite the castle being taken without the samurai suffering any further losses, Shimura is furious with Jin, as his actions have violated the samurai code of honor. Jin retorts by criticizing the ineffectiveness of the code against the dishonorable Mongols. Knowing that the Shogun will have Jin executed for treason, Shimura urges him to use Yuna as a scapegoat, but Jin refuses and embraces his persona as \"The Ghost.\" Shimura regretfully arrests Jin and burns the decree of adopting Jin as his son. Jin's allies remain loyal to him and help him escape captivity, but his horse is fatally shot by archers in the process. Jin travels north and learns that the Mongols have learned how to craft his poison, which they intend to use in their assault on the Japanese mainland. Before gathering his allies, claiming a new horse, and assaulting the Khan's final stronghold in Port Izumi, Jin leaves a note for Shimura in his castle asking him to join the effort with the samurai, which he does. With the bulk of the Mongol forces distracted, Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai. Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin. Reminiscing about what they have both lost, Jin and Shimura reluctantly duel each other, with Jin emerging victorious. Jin can kill Shimura to give him a proper warrior's death or abandon the samurai code completely and spare his life. Regardless, Jin becomes the enemy of the Shogun but continues protecting Tsushima.\n",
"\n",
"Where did Jin kill Khotun Khan?\n",
"\n",
" Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai.\n",
"-----END-----\n",
"\n",
"Prompt: \n",
"Ghost of Tsushima:\n",
"\n",
"Knowing that another frontal attack would only result in more losses, Jin infiltrates the keep to sneak poison into the Mongols' airag and kill Ryuzo. However, he again misses the Khan, who has retreated to a stronghold in the north. Despite the castle being taken without the samurai suffering any further losses, Shimura is furious with Jin, as his actions have violated the samurai code of honor. Jin retorts by criticizing the ineffectiveness of the code against the dishonorable Mongols. Knowing that the Shogun will have Jin executed for treason, Shimura urges him to use Yuna as a scapegoat, but Jin refuses and embraces his persona as \"The Ghost.\" Shimura regretfully arrests Jin and burns the decree of adopting Jin as his son. Jin's allies remain loyal to him and help him escape captivity, but his horse is fatally shot by archers in the process. Jin travels north and learns that the Mongols have learned how to craft his poison, which they intend to use in their assault on the Japanese mainland. Before gathering his allies, claiming a new horse, and assaulting the Khan's final stronghold in Port Izumi, Jin leaves a note for Shimura in his castle asking him to join the effort with the samurai, which he does. With the bulk of the Mongol forces distracted, Jin infiltrates the port and kills the Khan on his flagship.\n",
"\n",
"With Khotun Khan dead, the Mongol invasion loses momentum, and the tide turns in favor of the samurai. Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin. Reminiscing about what they have both lost, Jin and Shimura reluctantly duel each other, with Jin emerging victorious. Jin can kill Shimura to give him a proper warrior's death or abandon the samurai code completely and spare his life. Regardless, Jin becomes the enemy of the Shogun but continues protecting Tsushima.\n",
"\n",
"Who disbanded Clan Sakai?\n",
"\n",
" Shimura informs Jin that the Shogun considers him a threat to the island's stability and status quo of obedience of the people to their leaders. He states that the Shogun has disbanded Clan Sakai and ordered Shimura to kill Jin.\n",
"-----END-----\n",
"\n"
]
}
],
"source": [
"runner.generate_response(example_prompts,\n",
" [TextRetrievalLogitsProcessor(runner.tokenizer, doc=story, split_by_line=False)])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.17"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 2 additions & 1 deletion logits_processor_zoo/vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .last_phrase import ForceLastPhraseLogitsProcessor
from .multiple_choice import MultipleChoiceLogitsProcessor
from .trigger_phrase import TriggerPhraseLogitsProcessor
from .test_retrieval import TextRetrievalLogitsProcessor

__all__ = ['GenLengthLogitsProcessor', 'CiteFromPromptLogitsProcessor', 'ForceLastPhraseLogitsProcessor',
'MultipleChoiceLogitsProcessor', 'TriggerPhraseLogitsProcessor']
'MultipleChoiceLogitsProcessor', 'TriggerPhraseLogitsProcessor', 'TextRetrievalLogitsProcessor']
47 changes: 47 additions & 0 deletions logits_processor_zoo/vllm/test_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import List
import torch
from transformers import PreTrainedTokenizer
from logits_processor_zoo.utils import text_to_token, enforce_tokens


class TextRetrievalLogitsProcessor:
def __init__(self, tokenizer: PreTrainedTokenizer, doc: str, split_by_line: bool = True):
self.tokenizer = tokenizer
self.split_by_line = split_by_line
self.doc = doc
if self.split_by_line:
self.sep_token = text_to_token(tokenizer, "It is a new line\n", last=True)
else:
self.sep_token = text_to_token(tokenizer, "It is a sentence.", last=True)

self.doc_tokens = self.tokenizer.encode(self.doc, add_special_tokens=False)
self._init_start_tokens()

def _init_start_tokens(self):
self.start_tokens = [self.doc_tokens[0]]

prev_token = self.doc_tokens[0]
for token in self.doc_tokens[1:]:
if prev_token == self.sep_token:
self.start_tokens.append(token)
prev_token = token

def clone(self):
return TextRetrievalLogitsProcessor(self.tokenizer, self.doc, self.split_by_line)

def _find_all_next_tokens(self, past_token_ids):
gen_len = len(past_token_ids)
return [self.doc_tokens[i + gen_len] for i in range(len(self.doc_tokens) - gen_len + 1)
if (self.doc_tokens[i:i + gen_len] == past_token_ids) and (i + gen_len < len(self.doc_tokens))]

def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
if not past_token_ids:
next_tokens = self.start_tokens
else:
next_tokens = self._find_all_next_tokens(list(past_token_ids))

if (past_token_ids[-1] == self.sep_token) or (len(next_tokens) == 0):
next_tokens.append(self.tokenizer.eos_token_id)

scores = enforce_tokens(scores, next_tokens)
return scores