Skip to content

Commit 4bf797a

Browse files
Merge branch 'openvino-update' - upstream PR michaelfeil#454 OpenVINO support
2 parents bb80406 + 7bbdce8 commit 4bf797a

8 files changed

Lines changed: 453 additions & 59 deletions

File tree

libs/infinity_emb/Docker.template.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,25 @@ cpu:
2323
main_install: |
2424
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
2525
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
26+
RUN apt update -y && apt install git -y
2627
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
2728
RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
29+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
30+
openvino-tokenizers[transformers]==2024.5.* \
31+
openvino==2024.5.* \
32+
nncf>=2.11.0 \
33+
sentence_transformers==3.1.1 \
34+
openai \
35+
"transformers>4.45" \
36+
einops
37+
# RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
38+
# openvino-tokenizers[transformers]==2024.5.* \
39+
# openvino==2024.5.* \
40+
# nncf>=2.11.0 \
41+
# sentence_transformers==3.1.1 \
42+
# openai \
43+
# "transformers>4.45" \
44+
# einops
2845
extra_env_variables: |
2946
# Sets default to onnx
3047
ENV INFINITY_ENGINE="optimum"

libs/infinity_emb/Dockerfile.cpu_auto

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,49 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/
4141
#
4242
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
4343
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
44+
RUN apt update -y && apt install git -y
4445
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
4546
RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
47+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
48+
openvino-tokenizers[transformers]==2024.5.* \
49+
openvino==2024.5.* \
50+
nncf>=2.11.0 \
51+
sentence_transformers==3.1.1 \
52+
openai \
53+
"transformers>4.45" \
54+
einops
55+
# RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
56+
# openvino-tokenizers[transformers]==2024.5.* \
57+
# openvino==2024.5.* \
58+
# nncf>=2.11.0 \
59+
# sentence_transformers==3.1.1 \
60+
# openai \
61+
# "transformers>4.45" \
62+
# einops
4663

4764
COPY infinity_emb infinity_emb
4865
# Install dependency with infinity_emb package
4966
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
5067
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
68+
RUN apt update -y && apt install git -y
5169
RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu"
5270
RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
71+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
72+
openvino-tokenizers[transformers]==2024.5.* \
73+
openvino==2024.5.* \
74+
nncf>=2.11.0 \
75+
sentence_transformers==3.1.1 \
76+
openai \
77+
"transformers>4.45" \
78+
einops
79+
# RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
80+
# openvino-tokenizers[transformers]==2024.5.* \
81+
# openvino==2024.5.* \
82+
# nncf>=2.11.0 \
83+
# sentence_transformers==3.1.1 \
84+
# openai \
85+
# "transformers>4.45" \
86+
# einops
5387

5488
#
5589
# TODO: remove this line
@@ -59,8 +93,25 @@ FROM builder AS testing
5993
# install lint and test dependencies
6094
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
6195
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
96+
RUN apt update -y && apt install git -y
6297
RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu"
6398
RUN poetry run python -m pip install --no-cache-dir onnxruntime-openvino
99+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
100+
openvino-tokenizers[transformers]==2024.5.* \
101+
openvino==2024.5.* \
102+
nncf>=2.11.0 \
103+
sentence_transformers==3.1.1 \
104+
openai \
105+
"transformers>4.45" \
106+
einops
107+
# RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" poetry run python -m pip install -U --pre optimum-intel@git+https://github.com/huggingface/optimum-intel.git \
108+
# openvino-tokenizers[transformers]==2024.5.* \
109+
# openvino==2024.5.* \
110+
# nncf>=2.11.0 \
111+
# sentence_transformers==3.1.1 \
112+
# openai \
113+
# "transformers>4.45" \
114+
# einops
64115

65116
# lint
66117
RUN poetry run ruff check .
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Autogenerated warning:
2+
# This file is generated from Dockerfile.jinja2. Do not edit the Dockerfile.cuda|cpu|amd file directly.
3+
# Only contribute to the Dockerfile.jinja2 and dockerfile_template.yaml and regenerate the Dockerfile.cuda|cpu|amd
4+
5+
FROM ubuntu:22.04 AS base
6+
7+
ENV PYTHONUNBUFFERED=1 \
8+
\
9+
# pip
10+
PIP_NO_CACHE_DIR=off \
11+
PIP_DISABLE_PIP_VERSION_CHECK=on \
12+
PIP_DEFAULT_TIMEOUT=100 \
13+
\
14+
# make poetry create the virtual environment in the project's root
15+
# it gets named `.venv`
16+
POETRY_VIRTUALENVS_CREATE="true" \
17+
POETRY_VIRTUALENVS_IN_PROJECT="true" \
18+
# do not ask any interactive question
19+
POETRY_NO_INTERACTION=1 \
20+
EXTRAS="all" \
21+
PYTHON="python3.11"
22+
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
23+
WORKDIR /app
24+
25+
FROM base as builder
26+
# Set the working directory for the app
27+
# Define the version of Poetry to install (default is 1.7.1)
28+
# Define the directory to install Poetry to (default is /opt/poetry)
29+
ARG POETRY_VERSION=1.8.4
30+
ARG POETRY_HOME=/opt/poetry
31+
# Create a Python virtual environment for Poetry and install it
32+
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
33+
ENV PATH=$POETRY_HOME/bin:$PATH
34+
# Test if Poetry is installed in the expected path
35+
RUN echo "Poetry version:" && poetry --version
36+
# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
37+
COPY poetry.lock poetry.toml pyproject.toml README.md /app/
38+
# Install dependencies only
39+
#
40+
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
41+
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
42+
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
43+
44+
RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]"
45+
46+
COPY infinity_emb infinity_emb
47+
# Install dependency with infinity_emb package
48+
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
49+
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
50+
RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu"
51+
52+
#
53+
54+
55+
FROM builder as testing
56+
# install lint and test dependencies
57+
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
58+
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
59+
RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu"
60+
61+
# # lint
62+
# # RUN poetry run ruff check .
63+
# # RUN poetry run mypy .
64+
# # pytest
65+
# COPY tests tests
66+
# # run end to end tests because of duration of build in github ci.
67+
# # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
68+
# # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
69+
# RUN if [ -z "$TARGETPLATFORM" ]; then \
70+
# ARCH=$(uname -m); \
71+
# if [ "$ARCH" = "x86_64" ]; then \
72+
# TARGETPLATFORM="linux/amd64"; \
73+
# elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
74+
# TARGETPLATFORM="linux/arm64"; \
75+
# else \
76+
# echo "Unsupported architecture: $ARCH"; exit 1; \
77+
# fi; \
78+
# fi; \
79+
# echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
80+
# if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
81+
# poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
82+
# else \
83+
# poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py tests/end_to_end/test_sentence_transformers.py -m "not performance" -x ; \
84+
# fi
85+
# RUN echo "all tests passed" > "test_results.txt"
86+
87+
88+
# # Use a multi-stage build -> production version, with download
89+
# FROM base AS tested-builder
90+
# COPY --from=builder /app /app
91+
# # force testing stage to run
92+
# COPY --from=testing /app/test_results.txt /app/test_results.txt
93+
# ENV HF_HOME=/app/.cache/huggingface
94+
# ENV PATH=/app/.venv/bin:$PATH
95+
# # do nothing
96+
# RUN echo "copied all files"
97+
98+
99+
# Export with tensorrt, not recommended.
100+
# docker buildx build --target=production-tensorrt -f Dockerfile .
101+
# FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
102+
# ENV PYTHONUNBUFFERED=1 \
103+
# PIP_NO_CACHE_DIR=off \
104+
# PYTHON="python3.11"
105+
# RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y
106+
# COPY --from=builder /app /app
107+
# # force testing stage to run
108+
# COPY --from=testing /app/test_results.txt /app/test_results.txt
109+
# ENV HF_HOME=/app/.cache/torch
110+
# ENV PATH=/app/.venv/bin:$PATH
111+
# RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
112+
# ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
113+
# ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
114+
# ENTRYPOINT ["infinity_emb"]
115+
116+
117+
# # Use a multi-stage build -> production version, with download
118+
# # docker buildx build --target=production-with-download \
119+
# # --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
120+
# FROM tested-builder AS production-with-download
121+
# # collect model name and engine from build args
122+
# ARG MODEL_NAME
123+
# RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
124+
# ARG ENGINE
125+
# RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
126+
# # will exit with 3 if model is downloaded # TODO: better exit code
127+
# RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
128+
# ENTRYPOINT ["infinity_emb"]
129+
130+
# # Use a multi-stage build -> production version
131+
# FROM tested-builder AS production
132+
# ENTRYPOINT ["infinity_emb"]

libs/infinity_emb/infinity_emb/_optional_imports.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def _raise_error(self) -> None:
6969
"optimum.neuron",
7070
"<neuronx not available as extra, only runs on AMI image, no pip install possible.>",
7171
)
72+
CHECK_OPTIMUM_INTEL = OptionalImports("optimum.intel", "optimum")
7273
CHECK_PIL = OptionalImports("PIL", "vision")
7374
CHECK_POSTHOG = OptionalImports("posthog", "server")
7475
CHECK_PYDANTIC = OptionalImports("pydantic", "server")

libs/infinity_emb/infinity_emb/primitives.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def default_value():
106106

107107
class Device(EnumType):
108108
cpu = "cpu"
109+
openvino = "openvino"
109110
cuda = "cuda"
110111
mps = "mps"
111112
tensorrt = "tensorrt"

libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@
55

66
import numpy as np
77

8-
from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_TRANSFORMERS
8+
from infinity_emb._optional_imports import (
9+
CHECK_ONNXRUNTIME,
10+
CHECK_TRANSFORMERS,
11+
CHECK_OPTIMUM_INTEL,
12+
)
913
from infinity_emb.args import EngineArgs
1014
from infinity_emb.primitives import EmbeddingReturnType, PoolingMethod
1115
from infinity_emb.transformer.abstract import BaseEmbedder
1216
from infinity_emb.transformer.quantization.interface import quant_embedding_decorator
1317
from infinity_emb.transformer.utils_optimum import (
1418
cls_token_pooling,
1519
device_to_onnx,
16-
get_onnx_files,
20+
# get_onnx_files,
1721
mean_pooling,
1822
normalize,
1923
optimize_model,
@@ -24,40 +28,80 @@
2428
from optimum.onnxruntime import ( # type: ignore[import-untyped]
2529
ORTModelForFeatureExtraction,
2630
)
31+
from infinity_emb.transformer.utils_optimum import get_onnx_files
2732

2833
except (ImportError, RuntimeError, Exception) as ex:
2934
CHECK_ONNXRUNTIME.mark_dirty(ex)
3035

36+
37+
if CHECK_OPTIMUM_INTEL.is_available:
38+
try:
39+
from optimum.intel import OVModelForFeatureExtraction # type: ignore[import-untyped]
40+
from infinity_emb.transformer.utils_optimum import get_openvino_files
41+
42+
except (ImportError, RuntimeError, Exception) as ex:
43+
CHECK_OPTIMUM_INTEL.mark_dirty(ex)
44+
45+
3146
if CHECK_TRANSFORMERS.is_available:
3247
from transformers import AutoConfig, AutoTokenizer # type: ignore[import-untyped]
3348

3449

3550
class OptimumEmbedder(BaseEmbedder):
3651
def __init__(self, *, engine_args: EngineArgs):
37-
CHECK_ONNXRUNTIME.mark_required()
3852
provider = device_to_onnx(engine_args.device)
53+
self.provider = provider
54+
55+
if provider == "OpenVINOExecutionProvider":
56+
CHECK_OPTIMUM_INTEL.mark_required()
57+
filename = ""
58+
try:
59+
openvino_file = get_openvino_files(
60+
model_name_or_path=engine_args.model_name_or_path,
61+
revision=engine_args.revision,
62+
use_auth_token=True,
63+
)
64+
filename = openvino_file.as_posix()
65+
except Exception as e: # show error then let the optimum intel compress on the fly
66+
print(str(e))
67+
68+
self.model = optimize_model(
69+
model_name_or_path=engine_args.model_name_or_path,
70+
revision=engine_args.revision,
71+
trust_remote_code=engine_args.trust_remote_code,
72+
execution_provider=provider,
73+
file_name=filename,
74+
optimize_model=not os.environ.get(
75+
"INFINITY_ONNX_DISABLE_OPTIMIZE", False
76+
), # TODO: make this env variable public
77+
model_class=OVModelForFeatureExtraction,
78+
)
3979

40-
onnx_file = get_onnx_files(
41-
model_name_or_path=engine_args.model_name_or_path,
42-
revision=engine_args.revision,
43-
use_auth_token=True,
44-
prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()) and not engine_args.onnx_do_not_prefer_quantized,
45-
)
80+
elif provider == "CPUExecutionProvider":
81+
CHECK_ONNXRUNTIME.mark_required()
82+
onnx_file = get_onnx_files(
83+
model_name_or_path=engine_args.model_name_or_path,
84+
revision=engine_args.revision,
85+
use_auth_token=True,
86+
prefer_quantized="cpu" in provider.lower(),
87+
)
88+
self.model = optimize_model(
89+
model_name_or_path=engine_args.model_name_or_path,
90+
revision=engine_args.revision,
91+
trust_remote_code=engine_args.trust_remote_code,
92+
execution_provider=provider,
93+
file_name=onnx_file.as_posix(),
94+
optimize_model=not os.environ.get(
95+
"INFINITY_ONNX_DISABLE_OPTIMIZE", False
96+
), # TODO: make this env variable public
97+
model_class=ORTModelForFeatureExtraction,
98+
)
99+
self.model.use_io_binding = False
46100

47101
self.pooling = (
48102
mean_pooling if engine_args.pooling_method == PoolingMethod.mean else cls_token_pooling
49103
)
50104

51-
self.model = optimize_model(
52-
model_name_or_path=engine_args.model_name_or_path,
53-
revision=engine_args.revision,
54-
trust_remote_code=engine_args.trust_remote_code,
55-
execution_provider=provider,
56-
file_name=onnx_file.as_posix(),
57-
optimize_model=not engine_args.onnx_disable_optimize,
58-
model_class=ORTModelForFeatureExtraction,
59-
)
60-
self.model.use_io_binding = False
61105

62106
self.tokenizer = AutoTokenizer.from_pretrained(
63107
engine_args.model_name_or_path,

0 commit comments

Comments
 (0)