Skip to content

Commit cecadfd

Browse files
author
Ruslan Gainutdinov
committed
fix: return cluster shared volumes
1 parent 470a575 commit cecadfd

File tree

2 files changed

+37
-12
lines changed

2 files changed

+37
-12
lines changed

examples/clusters_example.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,27 @@ def create_cluster_example():
2929
# Get SSH keys
3030
ssh_keys = [key.id for key in verda.ssh_keys.get()]
3131

32+
cluster_type = '16B200'
33+
cluster_image = 'ubuntu-24.04-cuda-13.0-cluster'
34+
location_code=Locations.FIN_03
35+
3236
# Check if cluster type is available
33-
if not verda.clusters.is_available('16B200', Locations.FIN_03):
34-
raise ValueError('Cluster type 16B200 is not available in FIN_03')
37+
if not verda.clusters.is_available(cluster_type, location_code):
38+
raise ValueError(f'Cluster type {cluster_type} is not available in {location_code}')
3539

3640
# Get available images for cluster type
37-
images = verda.clusters.get_cluster_images('16B200')
38-
if 'ubuntu-22.04-cuda-12.9-cluster' not in images:
39-
raise ValueError('Ubuntu 22.04 CUDA 12.9 cluster image is not supported for 16B200')
41+
images = verda.clusters.get_cluster_images(cluster_type)
42+
if cluster_image not in images:
43+
raise ValueError(f'Cluster image {cluster_image} is not supported for {cluster_type}')
4044

41-
# Create a 16B200 cluster
45+
# Create a cluster
4246
cluster = verda.clusters.create(
4347
hostname='my-compute-cluster',
44-
cluster_type='16B200',
45-
image='ubuntu-22.04-cuda-12.9-cluster',
48+
cluster_type=cluster_type,
49+
image=cluster_image,
4650
description='Example compute cluster for distributed training',
4751
ssh_key_ids=ssh_keys,
48-
location=Locations.FIN_03,
52+
location=location_code,
4953
shared_volume_name='my-shared-volume',
5054
shared_volume_size=30000,
5155
wait_for_status=None,
@@ -59,8 +63,8 @@ def create_cluster_example():
5963

6064
# Wait for cluster to enter RUNNING status
6165
while cluster.status != ClusterStatus.RUNNING:
62-
time.sleep(30)
6366
print(f'Waiting for cluster to enter RUNNING status... (status: {cluster.status})')
67+
time.sleep(3)
6468
cluster = verda.clusters.get_by_id(cluster.id)
6569

6670
print(f'Public IP: {cluster.ip}')
@@ -100,7 +104,11 @@ def get_cluster_by_id_example(cluster_id: str):
100104
print(f' Created at: {cluster.created_at}')
101105
print(f' Public IP: {cluster.ip}')
102106
print(f' Worker nodes: {len(cluster.worker_nodes)}')
103-
107+
for node in cluster.worker_nodes:
108+
print(f' - {node.hostname} ({node.id}): {node.status}, private IP: {node.private_ip}')
109+
print(f' Shared volumes: {len(cluster.shared_volumes)}')
110+
for volume in cluster.shared_volumes:
111+
print(f' - {volume.name} ({volume.id}): {volume.size_in_gigabytes} GB, mounted at {volume.mount_point}')
104112
return cluster
105113

106114

verda/clusters/_clusters.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from verda.constants import Actions, ClusterStatus, ErrorCodes, Locations
88
from verda.exceptions import APIException
9+
from verda.http_client import HTTPClient
910

1011
CLUSTERS_ENDPOINT = '/clusters'
1112

@@ -30,6 +31,20 @@ class ClusterWorkerNode:
3031
hostname: str
3132
private_ip: str
3233

34+
@dataclass_json
35+
@dataclass
36+
class SharedVolume:
37+
"""Represents a shared volume in a cluster.
38+
39+
Attributes:
40+
id: Unique identifier for the volume.
41+
name: Name of the volume.
42+
"""
43+
44+
id: str
45+
name: str
46+
size_in_gigabytes: int
47+
mount_point: str | None = None
3348

3449
@dataclass_json
3550
@dataclass
@@ -59,7 +74,9 @@ class Cluster:
5974
location: str
6075
cluster_type: str
6176
worker_nodes: list[ClusterWorkerNode]
77+
shared_volumes: list[SharedVolume]
6278
ssh_key_ids: list[str]
79+
6380
image: str | None = None
6481
startup_script_id: str | None = None
6582
ip: str | None = None
@@ -71,7 +88,7 @@ class ClustersService:
7188
This service provides methods to create, retrieve, and manage compute clusters.
7289
"""
7390

74-
def __init__(self, http_client) -> None:
91+
def __init__(self, http_client: HTTPClient) -> None:
7592
"""Initializes the ClustersService with an HTTP client.
7693
7794
Args:

0 commit comments

Comments
 (0)