Merge pull request #140 from SharpAI/feature/rocm-gpu-detection

solderzzc · web-flow · commit 385e69250280 · 2026-03-08T22:07:34.000-07:00
Feature/rocm gpu detection
diff --git a/skills/detection/yolo-detection-2026/deploy.sh b/skills/detection/yolo-detection-2026/deploy.sh
@@ -160,7 +160,59 @@ fi
 log "Installing dependencies from $REQ_FILE ..."
 emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}"
 
-"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
+if [ "$BACKEND" = "rocm" ]; then
+    # ROCm: detect installed version for correct PyTorch index URL
+    ROCM_VER=""
+    if [ -f /opt/rocm/.info/version ]; then
+        ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+')
+    elif command -v amd-smi &>/dev/null; then
+        ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1)
+    elif command -v rocminfo &>/dev/null; then
+        ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1)
+    fi
+    ROCM_VER="${ROCM_VER:-6.2}"  # fallback if detection fails
+    log "Detected ROCm version: $ROCM_VER"
+
+    # Build list of ROCm versions to try (detected → step down → previous major)
+    ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1)
+    ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2)
+    ROCM_CANDIDATES="$ROCM_VER"
+    m=$((ROCM_MINOR - 1))
+    while [ "$m" -ge 0 ]; do
+        ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}"
+        m=$((m - 1))
+    done
+    # Also try previous major version (e.g., 6.4, 6.2 if on 7.x)
+    prev_major=$((ROCM_MAJOR - 1))
+    for pm in 4 3 2 1 0; do
+        ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}"
+    done
+
+    # Phase 1: Try each candidate until PyTorch installs successfully
+    TORCH_INSTALLED=false
+    for ver in $ROCM_CANDIDATES; do
+        log "Trying PyTorch for ROCm $ver ..."
+        if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then
+            log "Installed PyTorch with ROCm $ver support"
+            TORCH_INSTALLED=true
+            break
+        fi
+    done
+
+    if [ "$TORCH_INSTALLED" = false ]; then
+        log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI"
+        "$PIP" install torch torchvision -q 2>&1 | tail -3 >&2
+    fi
+
+    # Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.)
+    "$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \
+        'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2
+
+    # Prevent ultralytics from auto-installing CPU onnxruntime during export
+    export YOLO_AUTOINSTALL=0
+else
+    "$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
+fi
 
 # ─── Step 5: Pre-convert model to optimized format ───────────────────────────
 
diff --git a/skills/detection/yolo-detection-2026/requirements_rocm.txt b/skills/detection/yolo-detection-2026/requirements_rocm.txt
@@ -1,10 +1,12 @@
 # YOLO 2026 — ROCm (AMD GPU) requirements
-# Installs PyTorch with ROCm 6.2 support
---extra-index-url https://download.pytorch.org/whl/rocm6.2
+# NOTE: deploy.sh auto-detects the installed ROCm version and installs
+# PyTorch from the matching index URL. This file is a reference manifest.
 torch>=2.4.0
 torchvision>=0.19.0
 ultralytics>=8.3.0
 onnxruntime-rocm>=1.18
+onnx>=1.12.0,<2.0.0        # pre-install: prevents ultralytics from auto-installing CPU onnxruntime
+onnxslim>=0.1.71            # pre-install: same reason
 numpy>=1.24.0
 opencv-python-headless>=4.8.0
 Pillow>=10.0.0
diff --git a/skills/detection/yolo-detection-2026/scripts/detect.py b/skills/detection/yolo-detection-2026/scripts/detect.py
@@ -15,12 +15,16 @@
 """
 
 import sys
+import os
 import json
 import argparse
 import signal
 import time
 from pathlib import Path
 
+# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm)
+os.environ.setdefault("YOLO_AUTOINSTALL", "0")
+
 # Import env_config — try multiple locations:
 # 1. Same directory as detect.py (bundled copy)
 # 2. DeepCamera repo: skills/lib/
diff --git a/skills/detection/yolo-detection-2026/scripts/env_config.py b/skills/detection/yolo-detection-2026/scripts/env_config.py
@@ -51,9 +51,9 @@ class BackendSpec:
     ),
     "rocm": BackendSpec(
         name="rocm",
-        export_format="onnx",
-        model_suffix=".onnx",
-        half=False,  # ONNX Runtime ROCm handles precision internally
+        export_format="pytorch",     # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
+        model_suffix=".pt",
+        half=False,
     ),
     "mps": BackendSpec(
         name="mps",
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
             return False
 
         self.backend = "rocm"
-        self.device = "cuda"  # ROCm exposes as CUDA in PyTorch
+        # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
+        try:
+            import torch
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            else:
+                self.device = "cpu"
+                _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
+        except ImportError:
+            self.device = "cpu"
 
         # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
         if has_amd_smi:
@@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            pt_model.to(self.device)
+            fallback_device = self.device
+            if fallback_device == "cuda":
+                try:
+                    import torch
+                    if not torch.cuda.is_available():
+                        fallback_device = "cpu"
+                        _log("torch.cuda not available, falling back to CPU")
+                except ImportError:
+                    fallback_device = "cpu"
+            pt_model.to(fallback_device)
+            self.device = fallback_device
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        model.to(self.device)
+        fallback_device = self.device
+        if fallback_device == "cuda":
+            try:
+                import torch
+                if not torch.cuda.is_available():
+                    fallback_device = "cpu"
+                    _log("torch.cuda not available, falling back to CPU")
+            except ImportError:
+                fallback_device = "cpu"
+        model.to(fallback_device)
+        self.device = fallback_device
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 
diff --git a/skills/lib/env_config.py b/skills/lib/env_config.py
@@ -51,9 +51,9 @@ class BackendSpec:
     ),
     "rocm": BackendSpec(
         name="rocm",
-        export_format="onnx",
-        model_suffix=".onnx",
-        half=False,  # ONNX Runtime ROCm handles precision internally
+        export_format="pytorch",     # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
+        model_suffix=".pt",
+        half=False,
     ),
     "mps": BackendSpec(
         name="mps",
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
             return False
 
         self.backend = "rocm"
-        self.device = "cuda"  # ROCm exposes as CUDA in PyTorch
+        # ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
+        try:
+            import torch
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            else:
+                self.device = "cpu"
+                _log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
+        except ImportError:
+            self.device = "cpu"
 
         # Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
         if has_amd_smi:
@@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
 
             # Fallback: use the PT model we already loaded
             _log("Falling back to PyTorch model")
-            pt_model.to(self.device)
+            fallback_device = self.device
+            if fallback_device == "cuda":
+                try:
+                    import torch
+                    if not torch.cuda.is_available():
+                        fallback_device = "cpu"
+                        _log("torch.cuda not available, falling back to CPU")
+                except ImportError:
+                    fallback_device = "cpu"
+            pt_model.to(fallback_device)
+            self.device = fallback_device
             self.load_ms = (time.perf_counter() - t0) * 1000
             return pt_model, "pytorch"
 
         # No optimization requested or framework missing
         model = YOLO(f"{model_name}.pt")
-        model.to(self.device)
+        fallback_device = self.device
+        if fallback_device == "cuda":
+            try:
+                import torch
+                if not torch.cuda.is_available():
+                    fallback_device = "cpu"
+                    _log("torch.cuda not available, falling back to CPU")
+            except ImportError:
+                fallback_device = "cpu"
+        model.to(fallback_device)
+        self.device = fallback_device
         self.load_ms = (time.perf_counter() - t0) * 1000
         return model, "pytorch"
 
diff --git a/skills/lib/test_env_config_rocm.py b/skills/lib/test_env_config_rocm.py
@@ -93,8 +93,11 @@ def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir):
         """With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB)."""
         mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU)
 
-        env = HardwareEnv()
-        result = env._try_rocm()
+        mock_torch = mock.MagicMock()
+        mock_torch.cuda.is_available.return_value = True
+        with mock.patch.dict("sys.modules", {"torch": mock_torch}):
+            env = HardwareEnv()
+            result = env._try_rocm()
 
         assert result is True
         assert env.backend == "rocm"
@@ -170,6 +173,23 @@ def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir):
         assert env.backend == "rocm"
         assert env.gpu_name == ""  # No name parsed, but backend detected
 
+    @mock.patch("env_config.shutil.which", _mock_which({"amd-smi"}))
+    @mock.patch("env_config.Path.is_dir", return_value=False)
+    @mock.patch("env_config.subprocess.run")
+    def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir):
+        """When torch.cuda.is_available() is False, device stays 'cpu'."""
+        mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU)
+
+        mock_torch = mock.MagicMock()
+        mock_torch.cuda.is_available.return_value = False
+        with mock.patch.dict("sys.modules", {"torch": mock_torch}):
+            env = HardwareEnv()
+            env._try_rocm()
+
+        assert env.backend == "rocm"
+        assert env.device == "cpu"  # No PyTorch-ROCm → CPU fallback
+        assert env.gpu_name == "AMD Radeon RX 7900 XTX"  # GPU still detected
+
 
 class TestTryRocmFallback:
     """rocm-smi fallback (amd-smi not available)."""