roboflow · PawelPeczek-Roboflow · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
@@ -1058,3 +1058,6 @@
         )
 else:
     DISABLED_INFERENCE_MODELS_BACKENDS = set()
+
+ENABLE_TENSOR_DATA_REPRESENTATION = str2bool(os.getenv("ENABLE_TENSOR_DATA_REPRESENTATION", "False")) and USE_INFERENCE_MODELS
+
@@ -1,7 +1,7 @@
 import time
 from contextlib import contextmanager
 from threading import Lock
-from typing import Dict, Generator, List, Optional, Tuple, Union
+from typing import Dict, Generator, List, Optional, Tuple, Union, Any
 
 import numpy as np
 from fastapi.encoders import jsonable_encoder
@@ -429,6 +429,10 @@ def model_infer_sync(
         model = self._get_model_reference(model_id=model_id)
         return model.infer_from_request(request)
 
+    def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
+        model = self._get_model_reference(model_id=model_id)
+        return model.run_tensor_native_inference(**kwargs)
+
     def make_response(
         self, model_id: str, predictions: List[List[float]], *args, **kwargs
     ) -> InferenceResponse:

@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Any
 
 import numpy as np
 
@@ -143,6 +143,9 @@ def infer_only(self, model_id: str, request, img_in, img_dims, batch_size=None):
             model_id, request, img_in, img_dims, batch_size
         )
 
+    def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
+        return self.model_manager.run_tensor_native_inference(model_id, **kwargs)
+
     def preprocess(self, model_id: str, request: InferenceRequest):
         """Processes the preprocessing part of a request.
 

@@ -1,7 +1,7 @@
 import gc
 from collections import deque
 from threading import Lock
-from typing import List, Optional
+from typing import List, Optional, Any
 
 from inference.core import logger
 from inference.core.entities.requests.inference import InferenceRequest
@@ -225,6 +225,10 @@ def infer_from_request_sync(
         self._refresh_model_position_in_a_queue(model_id=model_id)
         return super().infer_from_request_sync(model_id, request, **kwargs)
 
+    def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
+        self._refresh_model_position_in_a_queue(model_id=model_id)
+        return super().run_tensor_native_inference(model_id, **kwargs)
+
     def infer_only(self, model_id: str, request, img_in, img_dims, batch_size=None):
         """Performs only the inference part of a request and updates the cache.
 

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Any
 
 from inference.core.entities.requests.inference import InferenceRequest
 from inference.core.entities.responses.inference import InferenceResponse
@@ -73,6 +73,12 @@ def infer_from_request_sync(
         logger.info(f"📥 [{model_id}] res={res}.")
         return res
 
+    def run_tensor_native_inference(self, model_id: str, **kwargs) -> Any:
+        logger.info(f"📥 [{model_id}] request={kwargs}.")
+        res = super().run_tensor_native_inference(model_id, **kwargs)
+        logger.info(f"📥 [{model_id}] res={res}.")
+        return res
+
     def remove(self, model_id: str, delete_from_disk: bool = True) -> Model:
         """Removes a model from the manager and logs the action.
 

@@ -40,6 +40,9 @@ def infer(self, image: Any, **kwargs) -> Any:
 
         return postprocessed
 
+    def run_tensor_native_inference(self, **kwargs) -> Any:
+        raise NotImplementedError
+
     def preprocess(
         self, image: Any, **kwargs
     ) -> Tuple[np.ndarray, PreprocessReturnMetadata]:

@@ -60,6 +60,7 @@
     PreProcessingOverrides,
     SemanticSegmentationModel,
 )
+from inference_models.models.base.semantic_segmentation import SemanticSegmentationResult
 from inference_models.models.base.types import InstancesRLEMasks, PreprocessingMetadata
 from inference_models.models.common.rle_utils import torch_mask_to_coco_rle
 
@@ -120,6 +121,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         )
         self.class_names = list(self._model.class_names)
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[Detections]:
+        kwargs = self.map_inference_kwargs(**kwargs)
+        return self._model(images, **kwargs)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
         pre_processing_overrides = PreProcessingOverrides(
@@ -272,6 +281,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         )
         self.class_names = list(self._model.class_names)
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[InstanceDetections]:
+        kwargs = self.map_inference_kwargs(**kwargs)
+        return self._model(images, **kwargs)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
         pre_processing_overrides = PreProcessingOverrides(
@@ -692,6 +709,14 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         )
         self.class_names = list(self._model.class_names)
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> Union[ClassificationPrediction, List[MultiLabelClassificationPrediction]]:
+        kwargs = self.map_inference_kwargs(**kwargs)
+        return self._model(images, **kwargs)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
         pre_processing_overrides = PreProcessingOverrides(
@@ -1027,6 +1052,14 @@ def class_map(self):
         # match segment.roboflow.com
         return {str(k): v for k, v in enumerate(self.class_names)}
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[SemanticSegmentationResult]:
+        kwargs = self.map_inference_kwargs(**kwargs)
+        return self._model(images, **kwargs)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
         pre_processing_overrides = PreProcessingOverrides(

@@ -1,8 +1,10 @@
 from time import perf_counter
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Tuple, Union, Literal
 
 import numpy as np
 import onnxruntime
+import torch
+import torch.nn.functional as F
 
 from inference.core.entities.requests.clip import (
     ClipCompareRequest,
@@ -79,6 +81,29 @@ def __init__(
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        action: Literal["compare", "embed-image", "embed-text"],
+        **kwargs
+    ) -> torch.Tensor:
+        if action == "embed-image":
+            return self._model.embed_images(**kwargs)
+        elif action == "embed-text":
+            return self._model.embed_text(**kwargs)
+        subject_type = kwargs.get("subject_type", "image")
+        prompt_type = kwargs.get("prompt_type", "text")
+        if subject_type == "image":
+            subject_embeddings = self._model.embed_images(images=kwargs["subject"], **kwargs)
+        else:
+            subject_embeddings = self._model.embed_text(text=kwargs["subject"], **kwargs)
+        if prompt_type == "image":
+            prompt_embeddings = self._model.embed_images(images=kwargs["prompt"], **kwargs)
+        else:
+            prompt_embeddings = self._model.embed_text(text=kwargs["prompt"], **kwargs)
+        subject_embeddings_norm = F.normalize(subject_embeddings, dim=1)
+        prompt_embeddings_norm = F.normalize(prompt_embeddings, dim=1)
+        return subject_embeddings_norm @ prompt_embeddings_norm.T
+
     def compare(
         self,
         subject: Any,

@@ -1,4 +1,4 @@
-from typing import Any, List, Tuple
+from typing import Any, List, Tuple, Union
 from uuid import uuid4
 
 import cv2
@@ -60,6 +60,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[torch.Tensor]:
+        return self._model(images, **kwargs)
+
     def preprocess(self, image: Any, **kwargs):
         if isinstance(image, list):
             raise ValueError("DepthAnythingV2 does not support batched inference.")

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, List, Tuple
+from typing import Any, List, Tuple, Union
 from uuid import uuid4
 
 import matplotlib.pyplot as plt
@@ -73,6 +73,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[torch.Tensor]:
+        return self._model(images, **kwargs)
+
     def preprocess(self, image: Any, **kwargs):
         if isinstance(image, list):
             raise ValueError("DepthAnythingV3 does not support batched inference.")

@@ -2,6 +2,8 @@
 from time import perf_counter
 from typing import Any, List, Tuple, Union
 
+import numpy as np
+import torch
 from PIL import Image
 
 from inference.core.entities.requests.doctr import DoctrOCRInferenceRequest
@@ -20,7 +22,7 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr
-from inference_models import AutoModel
+from inference_models import AutoModel, Detections
 from inference_models.models.doctr.doctr_torch import DocTR
 
 
@@ -56,6 +58,13 @@ def __init__(
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> Tuple[List[str], List[Detections]]:
+        return self._model.infer(images=images, **kwargs)
+
     def clear_cache(self, delete_from_disk: bool = True) -> None:
         pass
 

@@ -4,6 +4,7 @@
 from typing import Any, List, Tuple, Union
 
 import numpy as np
+import torch
 
 from inference.core.entities.requests.easy_ocr import EasyOCRInferenceRequest
 from inference.core.entities.responses.inference import (
@@ -63,6 +64,13 @@ def __init__(
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> Tuple[List[str], List[Detections]]:
+        return self._model.infer(images=images, **kwargs)
+
     def predict(self, image_in: np.ndarray, **kwargs) -> Tuple[str, Detections]:
         parsed_texts, parsed_structures = self._model.infer(images=image_in, **kwargs)
         parsed_text = parsed_texts[0]

@@ -1,5 +1,6 @@
-from typing import Any, List
+from typing import Any, List, Union
 
+import numpy as np
 import torch
 
 from inference.core.entities.responses import (
@@ -49,6 +50,17 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[str]:
+        kwargs = self.map_inference_kwargs(kwargs)
+        task = kwargs.get("prompt", "").split(">")[0] + ">"
+        if not task:
+            task = None
+        return self._model.prompt(images=images, task=task, **kwargs)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         pre_processing_overrides = PreProcessingOverrides(
             disable_contrast_enhancement=kwargs.get("disable_preproc_contrast", False),

@@ -1,5 +1,6 @@
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Union
 
+import numpy as np
 import torch
 
 from inference.core.entities.responses import (
@@ -50,6 +51,13 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[str]:
+        return self._model.prompt(images=images, **kwargs)
+
     def preprocess(self, image: Any, prompt: Optional[str] = None, **kwargs):
         is_batch = isinstance(image, list)
         if is_batch:

@@ -1,5 +1,8 @@
 from time import perf_counter
-from typing import Any, List
+from typing import Any, List, Union
+
+import numpy as np
+import torch
 
 from inference.core.entities.requests.groundingdino import GroundingDINOInferenceRequest
 from inference.core.entities.requests.inference import InferenceRequestImage
@@ -19,7 +22,7 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr, xyxy_to_xywh
-from inference_models import AutoModel
+from inference_models import AutoModel, Detections
 from inference_models.models.grounding_dino.grounding_dino_torch import (
     GroundingDinoForObjectDetectionTorch,
 )
@@ -65,6 +68,13 @@ def __init__(
             **kwargs,
         )
 
+    def run_tensor_native_inference(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs
+    ) -> List[Detections]:
+        return self._model(images=images, **kwargs)
+
     def preproc_image(self, image: Any):
         """Preprocesses an image.
-Original file line number
+Diff line change
@@ Expand Up / @@ -1058,3 +1058,6 @@ @@
             )
     else:
         DISABLED_INFERENCE_MODELS_BACKENDS = set()
+    ENABLE_TENSOR_DATA_REPRESENTATION = str2bool(os.getenv("ENABLE_TENSOR_DATA_REPRESENTATION", "False")) and USE_INFERENCE_MODELS