Support A4 instances with the B200 GPU on GCP (#3100)

jvstme · web-flow · commit 9c51df815dc7 · 2025-09-29T09:28:27.000Z
This implementation allows provisioning both individual A4 instances and clusters, but clusters do not yet support high-speed networking, since it requires a [different network setup](https://cloud.google.com/ai-hypercomputer/docs/create/create-vm#setup-network).
diff --git a/src/dstack/_internal/core/backends/base/offers.py b/src/dstack/_internal/core/backends/base/offers.py
@@ -22,6 +22,7 @@
 SUPPORTED_GPUHUNT_FLAGS = [
     "oci-spot",
     "lambda-arm",
+    "gcp-a4",
 ]
 
 
diff --git a/src/dstack/_internal/core/backends/gcp/compute.py b/src/dstack/_internal/core/backends/gcp/compute.py
@@ -867,8 +867,8 @@ def _has_gpu_quota(quotas: Dict[str, float], resources: Resources) -> bool:
     gpu = resources.gpus[0]
     if _is_tpu(gpu.name):
         return True
-    if gpu.name == "H100":
-        # H100 and H100_MEGA quotas are not returned by `regions_client.list`
+    if gpu.name in ["B200", "H100"]:
+        # B200, H100 and H100_MEGA quotas are not returned by `regions_client.list`
         return True
     quota_name = f"NVIDIA_{gpu.name}_GPUS"
     if gpu.name == "A100" and gpu.memory_mib == 80 * 1024:
diff --git a/src/dstack/_internal/core/backends/gcp/resources.py b/src/dstack/_internal/core/backends/gcp/resources.py
@@ -19,6 +19,7 @@
 DSTACK_GATEWAY_TAG = "dstack-gateway-instance"
 
 supported_accelerators = [
+    {"accelerator_name": "nvidia-b200", "gpu_name": "B200", "memory_mb": 1024 * 180},
     {"accelerator_name": "nvidia-a100-80gb", "gpu_name": "A100", "memory_mb": 1024 * 80},
     {"accelerator_name": "nvidia-tesla-a100", "gpu_name": "A100", "memory_mb": 1024 * 40},
     {"accelerator_name": "nvidia-l4", "gpu_name": "L4", "memory_mb": 1024 * 24},
@@ -476,5 +477,6 @@ def instance_type_supports_persistent_disk(instance_type_name: str) -> bool:
             "n4-",
             "h3-",
             "v6e",
+            "a4-",
         ]
     )
diff --git a/src/dstack/_internal/server/background/tasks/common.py b/src/dstack/_internal/server/background/tasks/common.py
@@ -19,4 +19,6 @@ def get_provisioning_timeout(backend_type: BackendType, instance_type_name: str)
         return timedelta(minutes=20)
     if backend_type == BackendType.VULTR and instance_type_name.startswith("vbm"):
         return timedelta(minutes=55)
+    if backend_type == BackendType.GCP and instance_type_name == "a4-highgpu-8g":
+        return timedelta(minutes=16)
     return timedelta(minutes=10)

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`SUPPORTED_GPUHUNT_FLAGS = [`
`23`	`23`	`"oci-spot",`
`24`	`24`	`"lambda-arm",`
	`25`	`+ "gcp-a4",`
`25`	`26`	`]`
`26`	`27`
`27`	`28`
Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`DSTACK_GATEWAY_TAG = "dstack-gateway-instance"`
`20`	`20`
`21`	`21`	`supported_accelerators = [`
	`22`	`+ {"accelerator_name": "nvidia-b200", "gpu_name": "B200", "memory_mb": 1024 * 180},`
`22`	`23`	`{"accelerator_name": "nvidia-a100-80gb", "gpu_name": "A100", "memory_mb": 1024 * 80},`
`23`	`24`	`{"accelerator_name": "nvidia-tesla-a100", "gpu_name": "A100", "memory_mb": 1024 * 40},`
`24`	`25`	`{"accelerator_name": "nvidia-l4", "gpu_name": "L4", "memory_mb": 1024 * 24},`
`@@ -476,5 +477,6 @@ def instance_type_supports_persistent_disk(instance_type_name: str) -> bool:`
`476`	`477`	`"n4-",`
`477`	`478`	`"h3-",`
`478`	`479`	`"v6e",`
	`480`	`+ "a4-",`
`479`	`481`	`]`
`480`	`482`	`)`