Skip to content

Commit 385e692

Browse files
authored
Merge pull request #140 from SharpAI/feature/rocm-gpu-detection
Feature/rocm gpu detection
2 parents a6798d1 + 28aede1 commit 385e692

File tree

6 files changed

+153
-17
lines changed

6 files changed

+153
-17
lines changed

skills/detection/yolo-detection-2026/deploy.sh

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,59 @@ fi
160160
log "Installing dependencies from $REQ_FILE ..."
161161
emit "{\"event\": \"progress\", \"stage\": \"install\", \"message\": \"Installing $BACKEND dependencies...\"}"
162162

163-
"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
163+
if [ "$BACKEND" = "rocm" ]; then
164+
# ROCm: detect installed version for correct PyTorch index URL
165+
ROCM_VER=""
166+
if [ -f /opt/rocm/.info/version ]; then
167+
ROCM_VER=$(head -1 /opt/rocm/.info/version | grep -oE '[0-9]+\.[0-9]+')
168+
elif command -v amd-smi &>/dev/null; then
169+
ROCM_VER=$(amd-smi version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1)
170+
elif command -v rocminfo &>/dev/null; then
171+
ROCM_VER=$(rocminfo 2>/dev/null | grep -i "HSA Runtime" | grep -oE '[0-9]+\.[0-9]+' | head -1)
172+
fi
173+
ROCM_VER="${ROCM_VER:-6.2}" # fallback if detection fails
174+
log "Detected ROCm version: $ROCM_VER"
175+
176+
# Build list of ROCm versions to try (detected → step down → previous major)
177+
ROCM_MAJOR=$(echo "$ROCM_VER" | cut -d. -f1)
178+
ROCM_MINOR=$(echo "$ROCM_VER" | cut -d. -f2)
179+
ROCM_CANDIDATES="$ROCM_VER"
180+
m=$((ROCM_MINOR - 1))
181+
while [ "$m" -ge 0 ]; do
182+
ROCM_CANDIDATES="$ROCM_CANDIDATES ${ROCM_MAJOR}.${m}"
183+
m=$((m - 1))
184+
done
185+
# Also try previous major version (e.g., 6.4, 6.2 if on 7.x)
186+
prev_major=$((ROCM_MAJOR - 1))
187+
for pm in 4 3 2 1 0; do
188+
ROCM_CANDIDATES="$ROCM_CANDIDATES ${prev_major}.${pm}"
189+
done
190+
191+
# Phase 1: Try each candidate until PyTorch installs successfully
192+
TORCH_INSTALLED=false
193+
for ver in $ROCM_CANDIDATES; do
194+
log "Trying PyTorch for ROCm $ver ..."
195+
if "$PIP" install torch torchvision --index-url "https://download.pytorch.org/whl/rocm${ver}" -q 2>&1; then
196+
log "Installed PyTorch with ROCm $ver support"
197+
TORCH_INSTALLED=true
198+
break
199+
fi
200+
done
201+
202+
if [ "$TORCH_INSTALLED" = false ]; then
203+
log "WARNING: No PyTorch ROCm wheels found, installing CPU PyTorch from PyPI"
204+
"$PIP" install torch torchvision -q 2>&1 | tail -3 >&2
205+
fi
206+
207+
# Phase 2: remaining packages (ultralytics, onnxruntime-rocm, etc.)
208+
"$PIP" install ultralytics onnxruntime-rocm 'onnx>=1.12.0,<2.0.0' 'onnxslim>=0.1.71' \
209+
'numpy>=1.24.0' 'opencv-python-headless>=4.8.0' 'Pillow>=10.0.0' -q 2>&1 | tail -3 >&2
210+
211+
# Prevent ultralytics from auto-installing CPU onnxruntime during export
212+
export YOLO_AUTOINSTALL=0
213+
else
214+
"$PIP" install -r "$REQ_FILE" -q 2>&1 | tail -5 >&2
215+
fi
164216

165217
# ─── Step 5: Pre-convert model to optimized format ───────────────────────────
166218

skills/detection/yolo-detection-2026/requirements_rocm.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
# YOLO 2026 — ROCm (AMD GPU) requirements
2-
# Installs PyTorch with ROCm 6.2 support
3-
--extra-index-url https://download.pytorch.org/whl/rocm6.2
2+
# NOTE: deploy.sh auto-detects the installed ROCm version and installs
3+
# PyTorch from the matching index URL. This file is a reference manifest.
44
torch>=2.4.0
55
torchvision>=0.19.0
66
ultralytics>=8.3.0
77
onnxruntime-rocm>=1.18
8+
onnx>=1.12.0,<2.0.0 # pre-install: prevents ultralytics from auto-installing CPU onnxruntime
9+
onnxslim>=0.1.71 # pre-install: same reason
810
numpy>=1.24.0
911
opencv-python-headless>=4.8.0
1012
Pillow>=10.0.0

skills/detection/yolo-detection-2026/scripts/detect.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
"""
1616

1717
import sys
18+
import os
1819
import json
1920
import argparse
2021
import signal
2122
import time
2223
from pathlib import Path
2324

25+
# Prevent ultralytics from auto-installing packages (e.g. onnxruntime-gpu on ROCm)
26+
os.environ.setdefault("YOLO_AUTOINSTALL", "0")
27+
2428
# Import env_config — try multiple locations:
2529
# 1. Same directory as detect.py (bundled copy)
2630
# 2. DeepCamera repo: skills/lib/

skills/detection/yolo-detection-2026/scripts/env_config.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ class BackendSpec:
5151
),
5252
"rocm": BackendSpec(
5353
name="rocm",
54-
export_format="onnx",
55-
model_suffix=".onnx",
56-
half=False, # ONNX Runtime ROCm handles precision internally
54+
export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
55+
model_suffix=".pt",
56+
half=False,
5757
),
5858
"mps": BackendSpec(
5959
name="mps",
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
165165
return False
166166

167167
self.backend = "rocm"
168-
self.device = "cuda" # ROCm exposes as CUDA in PyTorch
168+
# ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
169+
try:
170+
import torch
171+
if torch.cuda.is_available():
172+
self.device = "cuda"
173+
else:
174+
self.device = "cpu"
175+
_log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
176+
except ImportError:
177+
self.device = "cpu"
169178

170179
# Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
171180
if has_amd_smi:
@@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
467476

468477
# Fallback: use the PT model we already loaded
469478
_log("Falling back to PyTorch model")
470-
pt_model.to(self.device)
479+
fallback_device = self.device
480+
if fallback_device == "cuda":
481+
try:
482+
import torch
483+
if not torch.cuda.is_available():
484+
fallback_device = "cpu"
485+
_log("torch.cuda not available, falling back to CPU")
486+
except ImportError:
487+
fallback_device = "cpu"
488+
pt_model.to(fallback_device)
489+
self.device = fallback_device
471490
self.load_ms = (time.perf_counter() - t0) * 1000
472491
return pt_model, "pytorch"
473492

474493
# No optimization requested or framework missing
475494
model = YOLO(f"{model_name}.pt")
476-
model.to(self.device)
495+
fallback_device = self.device
496+
if fallback_device == "cuda":
497+
try:
498+
import torch
499+
if not torch.cuda.is_available():
500+
fallback_device = "cpu"
501+
_log("torch.cuda not available, falling back to CPU")
502+
except ImportError:
503+
fallback_device = "cpu"
504+
model.to(fallback_device)
505+
self.device = fallback_device
477506
self.load_ms = (time.perf_counter() - t0) * 1000
478507
return model, "pytorch"
479508

skills/lib/env_config.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ class BackendSpec:
5151
),
5252
"rocm": BackendSpec(
5353
name="rocm",
54-
export_format="onnx",
55-
model_suffix=".onnx",
56-
half=False, # ONNX Runtime ROCm handles precision internally
54+
export_format="pytorch", # PyTorch + HIP — ultralytics ONNX doesn't support ROCMExecutionProvider
55+
model_suffix=".pt",
56+
half=False,
5757
),
5858
"mps": BackendSpec(
5959
name="mps",
@@ -165,7 +165,16 @@ def _try_rocm(self) -> bool:
165165
return False
166166

167167
self.backend = "rocm"
168-
self.device = "cuda" # ROCm exposes as CUDA in PyTorch
168+
# ROCm exposes as CUDA in PyTorch — but only if PyTorch-ROCm is installed
169+
try:
170+
import torch
171+
if torch.cuda.is_available():
172+
self.device = "cuda"
173+
else:
174+
self.device = "cpu"
175+
_log("PyTorch CUDA/ROCm not available, using CPU for PyTorch fallback")
176+
except ImportError:
177+
self.device = "cpu"
169178

170179
# Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
171180
if has_amd_smi:
@@ -467,13 +476,33 @@ def load_optimized(self, model_name: str, use_optimized: bool = True):
467476

468477
# Fallback: use the PT model we already loaded
469478
_log("Falling back to PyTorch model")
470-
pt_model.to(self.device)
479+
fallback_device = self.device
480+
if fallback_device == "cuda":
481+
try:
482+
import torch
483+
if not torch.cuda.is_available():
484+
fallback_device = "cpu"
485+
_log("torch.cuda not available, falling back to CPU")
486+
except ImportError:
487+
fallback_device = "cpu"
488+
pt_model.to(fallback_device)
489+
self.device = fallback_device
471490
self.load_ms = (time.perf_counter() - t0) * 1000
472491
return pt_model, "pytorch"
473492

474493
# No optimization requested or framework missing
475494
model = YOLO(f"{model_name}.pt")
476-
model.to(self.device)
495+
fallback_device = self.device
496+
if fallback_device == "cuda":
497+
try:
498+
import torch
499+
if not torch.cuda.is_available():
500+
fallback_device = "cpu"
501+
_log("torch.cuda not available, falling back to CPU")
502+
except ImportError:
503+
fallback_device = "cpu"
504+
model.to(fallback_device)
505+
self.device = fallback_device
477506
self.load_ms = (time.perf_counter() - t0) * 1000
478507
return model, "pytorch"
479508

skills/lib/test_env_config_rocm.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,11 @@ def test_dual_gpu_picks_discrete(self, mock_run, _mock_dir):
9393
"""With 2 GPUs, picks the R9700 (32 GB) over iGPU (2 GB)."""
9494
mock_run.return_value = _make_run_result(AMD_SMI_DUAL_GPU)
9595

96-
env = HardwareEnv()
97-
result = env._try_rocm()
96+
mock_torch = mock.MagicMock()
97+
mock_torch.cuda.is_available.return_value = True
98+
with mock.patch.dict("sys.modules", {"torch": mock_torch}):
99+
env = HardwareEnv()
100+
result = env._try_rocm()
98101

99102
assert result is True
100103
assert env.backend == "rocm"
@@ -170,6 +173,23 @@ def test_amd_smi_failure_returns_true_with_defaults(self, mock_run, _mock_dir):
170173
assert env.backend == "rocm"
171174
assert env.gpu_name == "" # No name parsed, but backend detected
172175

176+
@mock.patch("env_config.shutil.which", _mock_which({"amd-smi"}))
177+
@mock.patch("env_config.Path.is_dir", return_value=False)
178+
@mock.patch("env_config.subprocess.run")
179+
def test_no_pytorch_rocm_falls_back_to_cpu_device(self, mock_run, _mock_dir):
180+
"""When torch.cuda.is_available() is False, device stays 'cpu'."""
181+
mock_run.return_value = _make_run_result(AMD_SMI_SINGLE_GPU)
182+
183+
mock_torch = mock.MagicMock()
184+
mock_torch.cuda.is_available.return_value = False
185+
with mock.patch.dict("sys.modules", {"torch": mock_torch}):
186+
env = HardwareEnv()
187+
env._try_rocm()
188+
189+
assert env.backend == "rocm"
190+
assert env.device == "cpu" # No PyTorch-ROCm → CPU fallback
191+
assert env.gpu_name == "AMD Radeon RX 7900 XTX" # GPU still detected
192+
173193

174194
class TestTryRocmFallback:
175195
"""rocm-smi fallback (amd-smi not available)."""

0 commit comments

Comments
 (0)