prevent get_device_properties LRU cache from extending torch.tensor lifetimes (Dao-AILab#102)

thakkarV · web-flow · commit a7f7b41e11ff · 2026-04-08T14:07:58.000-04:00
diff --git a/quack/cute_dsl_utils.py b/quack/cute_dsl_utils.py
@@ -77,7 +77,7 @@ def _parse_arch_str(arch_str: str) -> Tuple[int, int]:
 
 
 @lru_cache
-def get_device_capacity(device: torch.device = None) -> Tuple[int, int]:
+def _get_device_capacity_cached(device: torch.device = None) -> Tuple[int, int]:
     """Return (major, minor) device capability.
 
     Override with QUACK_ARCH (e.g. 'sm_90' or '90') for CPU-only compilation
@@ -89,6 +89,23 @@ def get_device_capacity(device: torch.device = None) -> Tuple[int, int]:
     return torch.cuda.get_device_capability(device)
 
 
+def get_device_capacity(
+    device: torch.device | torch.Tensor | None = None,
+) -> Tuple[int, int]:
+    """Return (major, minor) device capability.
+
+    Override with QUACK_ARCH (e.g. 'sm_90' or '90') for CPU-only compilation
+    without a GPU present.
+
+    Accepts either a ``torch.device`` or a tensor and canonicalizes to the
+    underlying device before consulting the cached helper. This avoids leaking
+    tensors through the LRU cache key.
+    """
+    if isinstance(device, torch.Tensor):
+        device = device.device
+    return _get_device_capacity_cached(device)
+
+
 def _partition_fields(obj):
     """Split dataclass fields into (constexpr_dict, non_constexpr_dict) by type."""
     all_fields = {field.name: getattr(obj, field.name) for field in fields(obj)}