Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions inference/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,3 +1058,6 @@
)
else:
DISABLED_INFERENCE_MODELS_BACKENDS = set()

ENABLE_TENSOR_DATA_REPRESENTATION = str2bool(os.getenv("ENABLE_TENSOR_DATA_REPRESENTATION", "False")) and USE_INFERENCE_MODELS

6 changes: 5 additions & 1 deletion inference/core/managers/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time
from contextlib import contextmanager
from threading import Lock
from typing import Dict, Generator, List, Optional, Tuple, Union
from typing import Dict, Generator, List, Optional, Tuple, Union, Any

import numpy as np
from fastapi.encoders import jsonable_encoder
Expand Down Expand Up @@ -429,6 +429,10 @@ def model_infer_sync(
model = self._get_model_reference(model_id=model_id)
return model.infer_from_request(request)

def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
model = self._get_model_reference(model_id=model_id)
return model.run_tensor_native_inference(**kwargs)

def make_response(
self, model_id: str, predictions: List[List[float]], *args, **kwargs
) -> InferenceResponse:
Expand Down
5 changes: 4 additions & 1 deletion inference/core/managers/decorators/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Any

import numpy as np

Expand Down Expand Up @@ -143,6 +143,9 @@ def infer_only(self, model_id: str, request, img_in, img_dims, batch_size=None):
model_id, request, img_in, img_dims, batch_size
)

def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
return self.model_manager.run_tensor_native_inference(model_id, **kwargs)

def preprocess(self, model_id: str, request: InferenceRequest):
"""Processes the preprocessing part of a request.

Expand Down
6 changes: 5 additions & 1 deletion inference/core/managers/decorators/fixed_size_cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import gc
from collections import deque
from threading import Lock
from typing import List, Optional
from typing import List, Optional, Any

from inference.core import logger
from inference.core.entities.requests.inference import InferenceRequest
Expand Down Expand Up @@ -225,6 +225,10 @@ def infer_from_request_sync(
self._refresh_model_position_in_a_queue(model_id=model_id)
return super().infer_from_request_sync(model_id, request, **kwargs)

def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
self._refresh_model_position_in_a_queue(model_id=model_id)
return super().run_tensor_native_inference(model_id, **kwargs)

def infer_only(self, model_id: str, request, img_in, img_dims, batch_size=None):
"""Performs only the inference part of a request and updates the cache.

Expand Down
8 changes: 7 additions & 1 deletion inference/core/managers/decorators/logger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Any

from inference.core.entities.requests.inference import InferenceRequest
from inference.core.entities.responses.inference import InferenceResponse
Expand Down Expand Up @@ -73,6 +73,12 @@ def infer_from_request_sync(
logger.info(f"📥 [{model_id}] res={res}.")
return res

def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
logger.info(f"📥 [{model_id}] request={kwargs}.")
res = super().run_tensor_native_inference(model_id, **kwargs)
logger.info(f"📥 [{model_id}] res={res}.")
return res

def remove(self, model_id: str, delete_from_disk: bool = True) -> Model:
"""Removes a model from the manager and logs the action.

Expand Down
3 changes: 3 additions & 0 deletions inference/core/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def infer(self, image: Any, **kwargs) -> Any:

return postprocessed

def run_tensor_native_inference(self, **kwargs) -> Any:
raise NotImplementedError

def preprocess(
self, image: Any, **kwargs
) -> Tuple[np.ndarray, PreprocessReturnMetadata]:
Expand Down
33 changes: 33 additions & 0 deletions inference/core/models/inference_models_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
PreProcessingOverrides,
SemanticSegmentationModel,
)
from inference_models.models.base.semantic_segmentation import SemanticSegmentationResult
from inference_models.models.base.types import InstancesRLEMasks, PreprocessingMetadata
from inference_models.models.common.rle_utils import torch_mask_to_coco_rle

Expand Down Expand Up @@ -120,6 +121,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
self.class_names = list(self._model.class_names)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[Detections]:
kwargs = self.map_inference_kwargs(**kwargs)
return self._model(images, **kwargs)

def map_inference_kwargs(self, kwargs: dict) -> dict:
kwargs["input_color_format"] = "bgr"
pre_processing_overrides = PreProcessingOverrides(
Expand Down Expand Up @@ -272,6 +281,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
self.class_names = list(self._model.class_names)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[InstanceDetections]:
kwargs = self.map_inference_kwargs(**kwargs)
return self._model(images, **kwargs)

def map_inference_kwargs(self, kwargs: dict) -> dict:
kwargs["input_color_format"] = "bgr"
pre_processing_overrides = PreProcessingOverrides(
Expand Down Expand Up @@ -692,6 +709,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
)
self.class_names = list(self._model.class_names)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> Union[ClassificationPrediction, List[MultiLabelClassificationPrediction]]:
kwargs = self.map_inference_kwargs(**kwargs)
return self._model(images, **kwargs)

def map_inference_kwargs(self, kwargs: dict) -> dict:
kwargs["input_color_format"] = "bgr"
pre_processing_overrides = PreProcessingOverrides(
Expand Down Expand Up @@ -1027,6 +1052,14 @@ def class_map(self):
# match segment.roboflow.com
return {str(k): v for k, v in enumerate(self.class_names)}

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[SemanticSegmentationResult]:
kwargs = self.map_inference_kwargs(**kwargs)
return self._model(images, **kwargs)

def map_inference_kwargs(self, kwargs: dict) -> dict:
kwargs["input_color_format"] = "bgr"
pre_processing_overrides = PreProcessingOverrides(
Expand Down
27 changes: 26 additions & 1 deletion inference/models/clip/clip_inference_models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from time import perf_counter
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Tuple, Union, Literal

import numpy as np
import onnxruntime
import torch
import torch.nn.functional as F

from inference.core.entities.requests.clip import (
ClipCompareRequest,
Expand Down Expand Up @@ -79,6 +81,29 @@ def __init__(
**kwargs,
)

def run_tensor_native_inference(
self,
action: Literal["compare", "embed-image", "embed-text"],
**kwargs
) -> torch.Tensor:
if action == "embed-image":
return self._model.embed_images(**kwargs)
elif action == "embed-text":
return self._model.embed_text(**kwargs)
subject_type = kwargs.get("subject_type", "image")
prompt_type = kwargs.get("prompt_type", "text")
if subject_type == "image":
subject_embeddings = self._model.embed_images(images=kwargs["subject"], **kwargs)
else:
subject_embeddings = self._model.embed_text(text=kwargs["subject"], **kwargs)
if prompt_type == "image":
prompt_embeddings = self._model.embed_images(images=kwargs["prompt"], **kwargs)
else:
prompt_embeddings = self._model.embed_text(text=kwargs["prompt"], **kwargs)
subject_embeddings_norm = F.normalize(subject_embeddings, dim=1)
prompt_embeddings_norm = F.normalize(prompt_embeddings, dim=1)
return subject_embeddings_norm @ prompt_embeddings_norm.T

def compare(
self,
subject: Any,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, List, Tuple
from typing import Any, List, Tuple, Union
from uuid import uuid4

import cv2
Expand Down Expand Up @@ -60,6 +60,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[torch.Tensor]:
return self._model(images, **kwargs)

def preprocess(self, image: Any, **kwargs):
if isinstance(image, list):
raise ValueError("DepthAnythingV2 does not support batched inference.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, List, Tuple
from typing import Any, List, Tuple, Union
from uuid import uuid4

import matplotlib.pyplot as plt
Expand Down Expand Up @@ -73,6 +73,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[torch.Tensor]:
return self._model(images, **kwargs)

def preprocess(self, image: Any, **kwargs):
if isinstance(image, list):
raise ValueError("DepthAnythingV3 does not support batched inference.")
Expand Down
11 changes: 10 additions & 1 deletion inference/models/doctr/doctr_model_inference_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from time import perf_counter
from typing import Any, List, Tuple, Union

import numpy as np
import torch
from PIL import Image

from inference.core.entities.requests.doctr import DoctrOCRInferenceRequest
Expand All @@ -20,7 +22,7 @@
from inference.core.models.base import Model
from inference.core.roboflow_api import get_extra_weights_provider_headers
from inference.core.utils.image_utils import load_image_bgr
from inference_models import AutoModel
from inference_models import AutoModel, Detections
from inference_models.models.doctr.doctr_torch import DocTR


Expand Down Expand Up @@ -56,6 +58,13 @@ def __init__(
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> Tuple[List[str], List[Detections]]:
return self._model.infer(images=images, **kwargs)

def clear_cache(self, delete_from_disk: bool = True) -> None:
pass

Expand Down
8 changes: 8 additions & 0 deletions inference/models/easy_ocr/easy_ocr_inference_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, List, Tuple, Union

import numpy as np
import torch

from inference.core.entities.requests.easy_ocr import EasyOCRInferenceRequest
from inference.core.entities.responses.inference import (
Expand Down Expand Up @@ -63,6 +64,13 @@ def __init__(
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> Tuple[List[str], List[Detections]]:
return self._model.infer(images=images, **kwargs)

def predict(self, image_in: np.ndarray, **kwargs) -> Tuple[str, Detections]:
parsed_texts, parsed_structures = self._model.infer(images=image_in, **kwargs)
parsed_text = parsed_texts[0]
Expand Down
14 changes: 13 additions & 1 deletion inference/models/florence2/florence2_inference_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List
from typing import Any, List, Union

import numpy as np
import torch

from inference.core.entities.responses import (
Expand Down Expand Up @@ -49,6 +50,17 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[str]:
kwargs = self.map_inference_kwargs(kwargs)
task = kwargs.get("prompt", "").split(">")[0] + ">"
if not task:
task = None
return self._model.prompt(images=images, task=task, **kwargs)

def map_inference_kwargs(self, kwargs: dict) -> dict:
pre_processing_overrides = PreProcessingOverrides(
disable_contrast_enhancement=kwargs.get("disable_preproc_contrast", False),
Expand Down
10 changes: 9 additions & 1 deletion inference/models/glm_ocr/glm_ocr_inference_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List, Optional
from typing import Any, List, Optional, Union

import numpy as np
import torch

from inference.core.entities.responses import (
Expand Down Expand Up @@ -50,6 +51,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[str]:
return self._model.prompt(images=images, **kwargs)

def preprocess(self, image: Any, prompt: Optional[str] = None, **kwargs):
is_batch = isinstance(image, list)
if is_batch:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from time import perf_counter
from typing import Any, List
from typing import Any, List, Union

import numpy as np
import torch

from inference.core.entities.requests.groundingdino import GroundingDINOInferenceRequest
from inference.core.entities.requests.inference import InferenceRequestImage
Expand All @@ -19,7 +22,7 @@
from inference.core.models.base import Model
from inference.core.roboflow_api import get_extra_weights_provider_headers
from inference.core.utils.image_utils import load_image_bgr, xyxy_to_xywh
from inference_models import AutoModel
from inference_models import AutoModel, Detections
from inference_models.models.grounding_dino.grounding_dino_torch import (
GroundingDinoForObjectDetectionTorch,
)
Expand Down Expand Up @@ -65,6 +68,13 @@ def __init__(
**kwargs,
)

def run_tensor_native_inference(
self,
images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
**kwargs
) -> List[Detections]:
return self._model(images=images, **kwargs)

def preproc_image(self, image: Any):
"""Preprocesses an image.

Expand Down
Loading
Loading