Add strided layout guard to tensor bridge, reject sparse tensors

leofang · claude · leofang · commit b5ec10d78bd9 · 2026-04-22T16:14:54.000Z
Check aoti_torch_get_layout() before extracting metadata — reject
non-strided tensors (sparse, mkldnn, etc.) whose shape/strides are
not meaningful for dense memory access.

We intentionally skip the other Python-level __dlpack__ guards
(requires_grad, is_conj, is_neg, wrong-device) for the same reason
PyTorch's own __dlpack_c_exchange_api__ C path skips them: the
C-level exchange path is designed for performance-critical consumers.
PyTorch's DLTensorFromPyObjectNoSync → toDLPackNonOwning performs
zero safety checks (see aten/src/ATen/DLConvertor.cpp).

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/cuda_core/cuda/core/_include/aoti_shim.def b/cuda_core/cuda/core/_include/aoti_shim.def
@@ -26,4 +26,6 @@ EXPORTS
     aoti_torch_get_device_index
     aoti_torch_device_type_cpu
     aoti_torch_device_type_cuda
+    aoti_torch_get_layout
+    aoti_torch_layout_strided
     aoti_torch_get_current_cuda_stream
diff --git a/cuda_core/cuda/core/_include/aoti_shim.h b/cuda_core/cuda/core/_include/aoti_shim.h
@@ -94,6 +94,13 @@ AOTI_SHIM_API AOTITorchError aoti_torch_get_device_index(
 AOTI_SHIM_API int32_t aoti_torch_device_type_cpu(void);
 AOTI_SHIM_API int32_t aoti_torch_device_type_cuda(void);
 
+/* ---- layout -------------------------------------------------------------- */
+
+AOTI_SHIM_API AOTITorchError aoti_torch_get_layout(
+    AtenTensorHandle tensor, int32_t* ret_layout);
+
+AOTI_SHIM_API int32_t aoti_torch_layout_strided(void);
+
 /* ---- stream -------------------------------------------------------------- */
 
 AOTI_SHIM_API AOTITorchError aoti_torch_get_current_cuda_stream(
diff --git a/cuda_core/cuda/core/_tensor_bridge.pyx b/cuda_core/cuda/core/_tensor_bridge.pyx
@@ -100,6 +100,10 @@ cdef extern from "_include/aoti_shim.h":
     int32_t aoti_torch_device_type_cpu()
     int32_t aoti_torch_device_type_cuda()
 
+    # layout
+    AOTITorchError aoti_torch_get_layout(AtenTensorHandle, int32_t*)
+    int32_t aoti_torch_layout_strided()
+
     # stream
     AOTITorchError aoti_torch_get_current_cuda_stream(int32_t, void**)
 
@@ -115,6 +119,7 @@ import sys
 
 cdef int32_t _DEVICE_TYPE_CPU  = aoti_torch_device_type_cpu()
 cdef int32_t _DEVICE_TYPE_CUDA = aoti_torch_device_type_cuda()
+cdef int32_t _LAYOUT_STRIDED   = aoti_torch_layout_strided()
 cdef dict _aoti_dtype_map = None
 cdef dict _aoti_itemsize_map = None
 
@@ -310,11 +315,26 @@ def view_as_torch_tensor(object obj, object stream_ptr, view=None):
     cdef int64_t* strides_ptr
     cdef int32_t dtype_code
     cdef int32_t device_type, device_index
+    cdef int32_t tensor_layout
     cdef StridedMemoryView buf
     cdef int itemsize
     cdef intptr_t _stream_ptr_int
     cdef _StridedLayout layout
 
+    # Reject non-strided (sparse, mkldnn, etc.) tensors whose shape/strides
+    # are not meaningful for dense memory access.  This mirrors the guard in
+    # PyTorch's Python-level __dlpack__ ("layout other than torch.strided").
+    # Note: we intentionally skip the other Python-level guards
+    # (requires_grad, is_conj, is_neg, wrong-device) for the same reason
+    # PyTorch's own __dlpack_c_exchange_api__ C path skips them — the C-level
+    # exchange path is designed for performance-critical consumers.
+    check_aoti(aoti_torch_get_layout(handle, &tensor_layout),
+               b"aoti_torch_get_layout")
+    if tensor_layout != _LAYOUT_STRIDED:
+        raise BufferError(
+            "Only strided tensors can be viewed via the tensor bridge "
+            "(use tensor.to_dense() to convert sparse tensors first)")
+
     check_aoti(aoti_torch_get_data_ptr(handle, &data_ptr),
                b"aoti_torch_get_data_ptr")
     check_aoti(aoti_torch_get_dim(handle, &ndim),