Provide bugfix for owlv2 in old inference, regarding monkey-patching with torch.compile(...) (#2270)

PawelPeczek-Roboflow · web-flow · commit ccdc35fd1193 · 2026-04-24T22:18:53.000+02:00
* Provide bugfix for owlv2 in old inference, regarding monkey-patching with torch.compile

* Make linters happy
diff --git a/inference/core/version.py b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.4"
+__version__ = "1.2.5"
 
 
 if __name__ == "__main__":
diff --git a/inference/models/owlv2/owlv2.py b/inference/models/owlv2/owlv2.py
@@ -53,6 +53,9 @@
     extract_image_payload_and_type,
     load_image_rgb,
 )
+from inference_models.models.owlv2.owlv2_hf import (
+    monkey_patch_vision_encoder_before_compilation,
+)
 
 CPU_IMAGE_EMBED_CACHE_SIZE = OWLV2_CPU_IMAGE_CACHE_SIZE
 PRELOADED_HF_MODELS = {}
@@ -120,6 +123,9 @@ def __new__(cls, huggingface_id: str):
 
             if OWLV2_COMPILE_MODEL:
                 torch._dynamo.config.suppress_errors = True
+                model._model = monkey_patch_vision_encoder_before_compilation(
+                    model._model
+                )
                 model.owlv2.vision_model = torch.compile(model.owlv2.vision_model)
             instance.model = model
             cls._instances[huggingface_id] = instance
diff --git a/inference/models/owlv2/owlv2_inference_models.py b/inference/models/owlv2/owlv2_inference_models.py
@@ -16,7 +16,6 @@
     ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
     ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
     API_KEY,
-    DEVICE,
     DISABLED_INFERENCE_MODELS_BACKENDS,
     MAX_DETECTIONS,
     OWLV2_COMPILE_MODEL,
@@ -43,7 +42,10 @@
     ReferenceBoundingBox,
     ReferenceExample,
 )
-from inference_models.models.owlv2.owlv2_hf import OWLv2HF
+from inference_models.models.owlv2.owlv2_hf import (
+    OWLv2HF,
+    monkey_patch_vision_encoder_before_compilation,
+)
 
 PRELOADED_HF_MODELS = {}
 
@@ -96,6 +98,9 @@ def __new__(
             if OWLV2_COMPILE_MODEL:
                 logger.info("Compiling OWLv2 model %s", huggingface_id)
                 torch._dynamo.config.suppress_errors = True
+                model._model = monkey_patch_vision_encoder_before_compilation(
+                    model._model
+                )
                 model._model.owlv2.vision_model = torch.compile(
                     model._model.owlv2.vision_model
                 )
diff --git a/inference_models/inference_models/models/owlv2/owlv2_hf.py b/inference_models/inference_models/models/owlv2/owlv2_hf.py
@@ -59,6 +59,8 @@
 def monkey_patch_vision_encoder_before_compilation(
     model: Owlv2ForObjectDetection,
 ) -> Owlv2ForObjectDetection:
+    # IMPORTANT: This function is used in inference - move it and you will be executed. This import must work.
+    # It's brittle, but we had no other choice :)
     """
     Due to global changes in transformers: https://github.com/huggingface/transformers/pull/43590
     our way of compiling owlv2 vision_model turned out invalid.

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.2.4"`
	`1`	`+__version__ = "1.2.5"`
`2`	`2`
`3`	`3`
`4`	`4`	`if __name__ == "__main__":`