Skip to content

Commit 5dbc93e

Browse files
authored
Merge pull request #1232 from llmware-ai/update-010426-onnx-vision
update onnx vision model support
2 parents 9d2f69f + dafb176 commit 5dbc93e

3 files changed

Lines changed: 421 additions & 2 deletions

File tree

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
""" This example shows how to use multimedia vision-to-text model with onnxruntime -
2+
3+
to run, pip install onnxruntime_genai
4+
"""
5+
6+
from llmware.models import ModelCatalog
7+
8+
model = ModelCatalog().load_model("phi-3-vision-onnx")
9+
10+
# supported image types: jpg, png
11+
img_path = "/path/to/local/image"
12+
13+
# to run a streaming response
14+
for token in model.stream("Describe this image",img_path):
15+
print(token, end="")
16+
17+
# to get a complete response upon completion only
18+
response = model.inference("Describe this image", img_path)
19+
print("--vision response - ", response)

llmware/model_configs.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3565,8 +3565,20 @@
35653565
"model_family": "ONNXEmbeddingModel", "model_category": "embedding",
35663566
"model_location": "llmware_repo", "use_case": "classifier",
35673567
"embedding_dims": 768, "context_window": 512, "link": "https://none",
3568-
"custom_model_repo": "", "hf_repo": "llmware/unitary-unbiased-toxic-roberta-onnx"}
3568+
"custom_model_repo": "", "hf_repo": "llmware/unitary-unbiased-toxic-roberta-onnx"},
35693569

3570+
{"model_name": "phi-3-vision-onnx", "display_name": "phi-3-vision-3b",
3571+
"model_family": "ONNXVisionGenerativeModel", "model_category": "generative_local",
3572+
"model_location": "llmware_repo", "context_window": 4096, "instruction_following": False,
3573+
"prompt_wrapper": "phi_3_vision", "temperature": 0.0, "trailing_space": "",
3574+
"hf_repo": "llmware/phi-3-vision-onnx",
3575+
"link": "https://huggingface.co/llmware/phi-3-vision-onnx",
3576+
"tokenizer_local": "tokenizer_phi3.json",
3577+
"fetch": {"module": "llmware.models", "method": "pull_snapshot_from_hf"},
3578+
"validation_files": ["phi-3-v-128k-instruct-text.onnx.data",
3579+
"phi-3-v-128k-instruct-vision.onnx.data",
3580+
"phi-3-v-128k-instruct-embedding.onnx.data"],
3581+
"custom_model_files": [], "custom_model_repo": "", "parameters": 3.8}
35703582

35713583
]
35723584

@@ -3630,6 +3642,12 @@
36303642
"phi_3": {"system_start": "<|system|>\n", "system_stop": "<|end|>\n",
36313643
"main_start": "<|user|>\n", "main_stop": "<|end|>\n", "start_llm_response": "<|assistant|>"},
36323644

3645+
# intended for embedding one image only currently
3646+
"phi_3_vision": {"system_start": "", "system_stop": "",
3647+
"main_start": "<|user|>\n<|image_1|>\n",
3648+
"main_stop": "<|end|>\n",
3649+
"start_llm_response": "<|assistant|>\n"},
3650+
36333651
"phi_4": {"system_start": "<|im_start|>system<|im_sep|>\n",
36343652
"system_stop": "<|im_end|>\n",
36353653
"main_start": "<|im_start|>user<|im_sep|>\n",

0 commit comments

Comments
 (0)