diff --git a/backend/backends/base.py b/backend/backends/base.py index c566af10..487b59d8 100644 --- a/backend/backends/base.py +++ b/backend/backends/base.py @@ -151,13 +151,24 @@ def check_cuda_compatibility() -> tuple[bool, str | None]: # Check for both sm_XX and compute_XX (JIT-compiled) entries compute_tag = f"compute_{major}{minor}" if sm_tag not in arch_list and compute_tag not in arch_list: - return False, ( - f"{device_name} (compute capability {capability} / {sm_tag}) " - f"is not supported by this PyTorch build. " - f"Supported architectures: {', '.join(arch_list)}. " - f"Install PyTorch nightly (cu128) for newer GPU support: " - f"pip install torch --index-url https://download.pytorch.org/whl/nightly/cu128" - ) + # Blackwell (sm_120+) requires the cu128 binary; give a + # clear, actionable message that names the re-download path. + if major >= 12: + msg = ( + f"Your GPU ({device_name}, {sm_tag}) requires the " + f"Blackwell CUDA binary. " + f"Go to Settings → Server → GPU Acceleration " + f"to re-download the correct binary." + ) + else: + msg = ( + f"{device_name} (compute capability {capability} / {sm_tag}) " + f"is not supported by this PyTorch build. " + f"Supported architectures: {', '.join(arch_list)}. " + f"Go to Settings → Server → GPU Acceleration " + f"to download a compatible CUDA binary." + ) + return False, msg except AttributeError: pass diff --git a/backend/models.py b/backend/models.py index 06f321ac..cbcd696e 100644 --- a/backend/models.py +++ b/backend/models.py @@ -474,6 +474,7 @@ class ModelStatus(BaseModel): downloading: bool = False # True if download is in progress size_mb: Optional[float] = None loaded: bool = False + cuda_arch_warning: Optional[str] = None # Set when GPU arch mismatches the CUDA binary class ModelStatusListResponse(BaseModel): diff --git a/backend/routes/models.py b/backend/routes/models.py index 7cbb7b04..2ada4791 100644 --- a/backend/routes/models.py +++ b/backend/routes/models.py @@ -242,6 +242,18 @@ async def get_model_status(): from ..backends import get_all_model_configs, check_model_loaded + # Check once — a CUDA arch mismatch affects every model on this machine. + cuda_arch_warning: str | None = None + try: + import torch + + if torch.cuda.is_available(): + from ..backends.base import check_cuda_compatibility + + _ok, cuda_arch_warning = check_cuda_compatibility() + except Exception: + pass + registry_configs = get_all_model_configs() model_configs = [ { @@ -359,6 +371,7 @@ async def get_model_status(): downloading=is_downloading, size_mb=size_mb, loaded=loaded, + cuda_arch_warning=cuda_arch_warning, ) ) except Exception: @@ -378,6 +391,7 @@ async def get_model_status(): downloading=is_downloading, size_mb=None, loaded=loaded, + cuda_arch_warning=cuda_arch_warning, ) ) diff --git a/backend/utils/platform_detect.py b/backend/utils/platform_detect.py index 1ec2980a..236f725a 100644 --- a/backend/utils/platform_detect.py +++ b/backend/utils/platform_detect.py @@ -3,19 +3,37 @@ """ import platform -from typing import Literal +from typing import Literal, Optional def is_apple_silicon() -> bool: """ Check if running on Apple Silicon (arm64 macOS). - + Returns: True if on Apple Silicon, False otherwise """ return platform.system() == "Darwin" and platform.machine() == "arm64" +def get_cuda_arch() -> Optional[str]: + """Return the SM architecture string for the primary CUDA GPU, or None. + + Examples: ``"sm_90"`` for an RTX 4090, ``"sm_120"`` for an RTX 5090 + (Blackwell). Returns ``None`` when no CUDA GPU is present or torch is + not installed. + """ + try: + import torch + + if not torch.cuda.is_available(): + return None + major, minor = torch.cuda.get_device_capability(0) + return f"sm_{major}{minor}" + except Exception: + return None + + def get_backend_type() -> Literal["mlx", "pytorch"]: """ Detect the best backend for the current platform.