Skip to content

Commit ae2f67a

Browse files
Add H200 and B200 support for Crusoe provider (#216)
1 parent 98d09c8 commit ae2f67a

3 files changed

Lines changed: 31 additions & 7 deletions

File tree

src/gpuhunt/_internal/constraints.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def is_nvidia_superchip(gpu_name: str) -> bool:
177177
NvidiaGPUInfo(name="H200", memory=141, compute_capability=(9, 0)),
178178
NvidiaGPUInfo(name="H200NVL", memory=141, compute_capability=(9, 0)),
179179
NvidiaGPUInfo(name="B200", memory=180, compute_capability=(10, 0)),
180+
NvidiaGPUInfo(name="GB200", memory=186, compute_capability=(10, 0)),
180181
NvidiaGPUInfo(name="L4", memory=24, compute_capability=(8, 9)),
181182
NvidiaGPUInfo(name="L40", memory=48, compute_capability=(8, 9)),
182183
NvidiaGPUInfo(name="L40S", memory=48, compute_capability=(8, 9)),
@@ -253,6 +254,12 @@ def is_nvidia_superchip(gpu_name: str) -> bool:
253254
architecture=AMDArchitecture.CDNA3,
254255
device_ids=(0x74A5,),
255256
),
257+
AMDGPUInfo(
258+
name="MI355X",
259+
memory=288,
260+
architecture=AMDArchitecture.CDNA4,
261+
device_ids=(0x75A3,),
262+
),
256263
]
257264

258265
KNOWN_TPUS: list[TPUInfo] = [TPUInfo(name=version, memory=0) for version in _TPU_VERSIONS]

src/gpuhunt/_internal/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class AMDArchitecture(enum.Enum):
3232
CDNA = "CDNA"
3333
CDNA2 = "CDNA2"
3434
CDNA3 = "CDNA3"
35+
CDNA4 = "CDNA4"
3536

3637
@classmethod
3738
def cast(cls, value: Union["AMDArchitecture", str]) -> "AMDArchitecture":

src/gpuhunt/providers/crusoe.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010

1111
import requests
1212

13-
from gpuhunt._internal.models import AcceleratorVendor, QueryFilter, RawCatalogItem
13+
from gpuhunt._internal.models import (
14+
AcceleratorVendor,
15+
CPUArchitecture,
16+
QueryFilter,
17+
RawCatalogItem,
18+
)
1419
from gpuhunt.providers import AbstractProvider
1520

1621
logger = logging.getLogger(__name__)
@@ -26,15 +31,13 @@
2631
"A100-PCIe-80GB": ("A100", AcceleratorVendor.NVIDIA, 80),
2732
"A100-SXM-80GB": ("A100", AcceleratorVendor.NVIDIA, 80),
2833
"H100-SXM-80GB": ("H100", AcceleratorVendor.NVIDIA, 80),
34+
"H200-SXM-141GB": ("H200", AcceleratorVendor.NVIDIA, 141),
35+
"B200-SXM-180GB": ("B200", AcceleratorVendor.NVIDIA, 180),
36+
"GB200-NVL-186GB": ("GB200", AcceleratorVendor.NVIDIA, 186),
2937
"L40S-48GB": ("L40S", AcceleratorVendor.NVIDIA, 48),
3038
"A40-PCIe-48GB": ("A40", AcceleratorVendor.NVIDIA, 48),
3139
"MI300X-192GB": ("MI300X", AcceleratorVendor.AMD, 192),
32-
# TODO: The following GPUs are listed on https://crusoe.ai/cloud/pricing but not yet
33-
# returned by the instance types API. Add them once Crusoe exposes them:
34-
# - H200 141GB ($4.29/GPU-hr on-demand, spot: contact sales)
35-
# - GB200 186GB (contact sales)
36-
# - B200 180GB (contact sales)
37-
# - MI355X 288GB ($3.45 listed but not confirmed; also missing from KNOWN_AMD_GPUS)
40+
"MI355X_288GB": ("MI355X", AcceleratorVendor.AMD, 288),
3841
}
3942

4043
# Per-GPU-hour pricing from https://crusoe.ai/cloud/pricing
@@ -44,6 +47,10 @@
4447
"A100-PCIe-80GB": (1.65, 1.20),
4548
"A100-SXM-80GB": (1.95, 1.30),
4649
"H100-SXM-80GB": (3.90, 1.60),
50+
"H200-SXM-141GB": (4.29, None),
51+
# TODO: B200 estimated from B200/H100 ratio on other providers; update once Crusoe publishes rates.
52+
# GB200 and MI355X pricing not known yet; update once Crusoe publishes rates.
53+
"B200-SXM-180GB": (7.25, None),
4754
"L40S-48GB": (1.00, 0.50),
4855
"A40-PCIe-48GB": (0.90, 0.40),
4956
"MI300X-192GB": (3.45, 0.95),
@@ -137,6 +144,13 @@ def _request(self, method: str, path: str, params: Optional[dict] = None) -> req
137144
return requests.request(method, url, headers=headers, params=params, timeout=TIMEOUT)
138145

139146

147+
def _get_cpu_arch(spec: dict) -> str:
148+
cpu_type = spec.get("cpu_type", "")
149+
if cpu_type == "arm64":
150+
return CPUArchitecture.ARM.value
151+
return CPUArchitecture.X86.value
152+
153+
140154
def _get_available_type_locations(capacities: list[dict]) -> dict[str, list[str]]:
141155
best_qty: dict[tuple[str, str], int] = defaultdict(int)
142156
for cap in capacities:
@@ -191,6 +205,7 @@ def _make_gpu_items(
191205
gpu_memory=gpu_memory,
192206
spot=None,
193207
disk_size=float(spec["disk_gb"]) if spec.get("disk_gb") else None,
208+
cpu_arch=_get_cpu_arch(spec),
194209
# disk_gb: ephemeral NVMe size in GB (0 = no ephemeral disk).
195210
# Used by dstack to decide whether to create a persistent data disk.
196211
provider_data={"disk_gb": spec.get("disk_gb", 0)},
@@ -231,6 +246,7 @@ def _make_cpu_items(product_name: str, spec: dict, locations: list[str]) -> list
231246
gpu_memory=None,
232247
spot=False,
233248
disk_size=float(spec["disk_gb"]) if spec.get("disk_gb") else None,
249+
cpu_arch=_get_cpu_arch(spec),
234250
provider_data={"disk_gb": spec.get("disk_gb", 0)},
235251
)
236252

0 commit comments

Comments
 (0)