66- List all clusters
77- Get a specific cluster by ID
88- Get cluster nodes
9- - Scale a cluster
109- Delete a cluster
1110"""
1211
1312import os
13+ import time
1414
1515from verda import VerdaClient
1616from verda .constants import Actions , Locations
1717
1818# Get credentials from environment variables
1919CLIENT_ID = os .environ .get ('VERDA_CLIENT_ID' )
2020CLIENT_SECRET = os .environ .get ('VERDA_CLIENT_SECRET' )
21+ BASE_URL = os .environ .get ('VERDA_BASE_URL' , 'https://api.verda.com/v1' )
2122
2223# Create client
23- verda = VerdaClient (CLIENT_ID , CLIENT_SECRET )
24+ verda = VerdaClient (CLIENT_ID , CLIENT_SECRET , base_url = BASE_URL )
2425
2526
2627def create_cluster_example ():
@@ -34,19 +35,20 @@ def create_cluster_example():
3435
3536 # Get available images for cluster type
3637 images = verda .clusters .get_cluster_images ('16B200' )
37- if 'ubuntu-22.04-cuda-12.9 -cluster' not in images :
38+ if 'ubuntu-22.04-cuda-12.4 -cluster' not in images :
3839 raise ValueError ('Ubuntu 22.04 CUDA 12.9 cluster image is not supported for 16B200' )
3940
4041 # Create a 16B200 cluster
4142 cluster = verda .clusters .create (
4243 hostname = 'my-compute-cluster' ,
4344 cluster_type = '16B200' ,
44- image = 'ubuntu-22.04-cuda-12.9 -cluster' ,
45+ image = 'ubuntu-22.04-cuda-12.4 -cluster' ,
4546 description = 'Example compute cluster for distributed training' ,
4647 ssh_key_ids = ssh_keys ,
4748 location = Locations .FIN_03 ,
4849 shared_volume_name = 'my-shared-volume' ,
4950 shared_volume_size = 30000 ,
51+ wait_for_status = None ,
5052 )
5153
5254 print (f'Creating cluster: { cluster .id } ' )
@@ -55,6 +57,15 @@ def create_cluster_example():
5557 print (f'Cluster cluster_type: { cluster .cluster_type } ' )
5658 print (f'Location: { cluster .location } ' )
5759
60+ # Wait for cluster to enter RUNNING status
61+ while cluster .status != verda .constants .cluster_status .RUNNING :
62+ time .sleep (2 )
63+ print (f'Waiting for cluster to enter RUNNING status... (status: { cluster .status } )' )
64+ cluster = verda .clusters .get_by_id (cluster .id )
65+
66+ print (f'Public IP: { cluster .ip } ' )
67+ print ('Cluster is now running and ready to use!' )
68+
5869 return cluster
5970
6071
0 commit comments