Cache type check in _is_torch_tensor for ~20% speedup

leofang · emcastillo · claude · leofang · commit 74798e7be2d4 · 2026-04-09T23:35:25.000Z
Cache the result of the torch tensor type check (module + hasattr +
version) keyed by type(obj). Subsequent calls for the same type are
a single dict lookup (~76 ns) instead of the full check (~186 ns).

Non-torch objects also benefit as the cache returns False immediately
after the first miss.

Co-Authored-By: Emilio Castillo &lt;ecastillo@nvidia.com&gt;
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx
@@ -36,6 +36,9 @@ from cuda.core._memory import Buffer
 # ---------------------------------------------------------------------------
 
 cdef object _tensor_bridge = None
+# Cache: type(obj) -> True/False for the torch tensor check.
+# Once a type is seen, we never re-check.
+cdef dict _torch_type_cache = {}
 # Tri-state: None = not checked, True/False = result of version check
 cdef object _torch_version_ok = None
 
@@ -58,9 +61,15 @@ cdef inline bint _torch_version_check():
 
 
 cdef inline bint _is_torch_tensor(object obj):
-    cdef str mod = type(obj).__module__ or ""
-    return mod.startswith("torch") and hasattr(obj, "data_ptr") \
+    cdef type tp = type(obj)
+    cdef object cached = _torch_type_cache.get(tp)
+    if cached is not None:
+        return <bint>cached
+    cdef str mod = tp.__module__ or ""
+    cdef bint result = mod.startswith("torch") and hasattr(obj, "data_ptr") \
         and _torch_version_check()
+    _torch_type_cache[tp] = result
+    return result
 
 
 cdef object _get_tensor_bridge():