11import itertools
22import time
33from dataclasses import dataclass
4- from typing import Literal
54
65from dataclasses_json import dataclass_json
76
8- from verda .constants import Actions , Locations
7+ from verda .constants import Actions , ClusterStatus , ErrorCodes , Locations
8+ from verda .exceptions import APIException
99
1010CLUSTERS_ENDPOINT = '/clusters'
1111
12+ # Default shared volume size is 30TB
13+ DEFAULT_SHARED_VOLUME_SIZE = 30000
14+
1215
1316@dataclass_json
1417@dataclass
1518class ClusterWorkerNode :
1619 """Represents a worker node in a cluster.
1720
1821 Attributes:
19- id: Unique identifier for the node (instance ID) .
22+ id: Unique identifier for the node.
2023 status: Current status of the node.
2124 hostname: Network hostname of the node.
2225 private_ip: Private IP address of the node.
@@ -40,7 +43,7 @@ class Cluster:
4043 status: Current operational status of the cluster.
4144 created_at: Timestamp of cluster creation.
4245 location: Datacenter location code (default: Locations.FIN_03).
43- cluster_type: Type of instances used for cluster nodes .
46+ cluster_type: Type of the cluster.
4447 worker_nodes: List of nodes in the cluster.
4548 ssh_key_ids: List of SSH key IDs associated with the cluster nodes.
4649 image: Image ID or type used for cluster nodes.
@@ -65,7 +68,7 @@ class Cluster:
6568class ClustersService :
6669 """Service for managing compute clusters through the API.
6770
68- This service provides methods to create, retrieve, scale, and manage compute clusters.
71+ This service provides methods to create, retrieve, and manage compute clusters.
6972 """
7073
7174 def __init__ (self , http_client ) -> None :
@@ -118,6 +121,7 @@ def create(
118121 shared_volume_name : str | None = None ,
119122 shared_volume_size : int | None = None ,
120123 * ,
124+ wait_for_status : str | None = ClusterStatus .PROVISIONING ,
121125 max_wait_time : float = 900 ,
122126 initial_interval : float = 1.0 ,
123127 max_interval : float = 10 ,
@@ -135,6 +139,7 @@ def create(
135139 startup_script_id: Optional ID of startup script to run on nodes.
136140 shared_volume_name: Optional name for the shared volume.
137141 shared_volume_size: Optional size for the shared volume, in GB, default to 30TB.
142+ wait_for_status: Status to wait for the cluster to reach, default to PROVISIONING. If None, no wait is performed.
138143 max_wait_time: Maximum total wait for the cluster to start creating, in seconds (default: 900)
139144 initial_interval: Initial interval, in seconds (default: 1.0)
140145 max_interval: The longest single delay allowed between retries, in seconds (default: 10)
@@ -158,19 +163,28 @@ def create(
158163 'startup_script_id' : startup_script_id ,
159164 'shared_volume' : {
160165 'name' : shared_volume_name if shared_volume_name else hostname + '-shared-volume' ,
161- 'size' : shared_volume_size if shared_volume_size else 30000 ,
166+ 'size' : shared_volume_size if shared_volume_size else DEFAULT_SHARED_VOLUME_SIZE ,
162167 },
163168 }
164169 response = self ._http_client .post (CLUSTERS_ENDPOINT , json = payload ).json ()
165170 id = response ['id' ]
166171
172+ if not wait_for_status :
173+ return self .get_by_id (id )
174+
167175 # Wait for cluster to enter creating state with timeout
168176 deadline = time .monotonic () + max_wait_time
169177 for i in itertools .count ():
170178 cluster = self .get_by_id (id )
171- if cluster .status != 'ordered' :
179+ if cluster .status == wait_for_status :
172180 return cluster
173181
182+ if cluster .status == ClusterStatus .ERROR :
183+ raise APIException (ErrorCodes .SERVER_ERROR , f'Cluster { id } entered error state' )
184+
185+ if cluster .status == ClusterStatus .DISCONTINUED :
186+ raise APIException (ErrorCodes .SERVER_ERROR , f'Cluster { id } was discontinued' )
187+
174188 now = time .monotonic ()
175189 if now >= deadline :
176190 raise TimeoutError (
@@ -181,10 +195,10 @@ def create(
181195 time .sleep (interval )
182196
183197 def action (self , id_list : list [str ] | str , action : str ) -> None :
184- """Performs an action on one or more instances .
198+ """Performs an action on one or more clusters .
185199
186200 Args:
187- id_list: Single instance ID or list of instance IDs to act upon.
201+ id_list: Single cluster ID or list of cluster IDs to act upon.
188202 action: Action to perform on the clusters. Only `delete` is supported.
189203
190204 Raises:
@@ -215,20 +229,21 @@ def is_available(
215229 cluster_type : str ,
216230 location_code : str | None = None ,
217231 ) -> bool :
218- """Checks if a specific instance type is available for deployment.
232+ """Checks if a specific cluster type is available for deployment.
219233
220234 Args:
221- cluster_type: Type of instance to check availability for.
235+ cluster_type: Type of cluster to check availability for.
222236 location_code: Optional datacenter location code.
223237
224238 Returns:
225- True if the instance type is available, False otherwise.
239+ True if the cluster type is available, False otherwise.
226240 """
227241 query_params = {'location_code' : location_code }
228242 url = f'/cluster-availability/{ cluster_type } '
229- return self ._http_client .get (url , query_params ).json ()
243+ response = self ._http_client .get (url , query_params ).text
244+ return response == 'true'
230245
231- def get_availabilities (self , location_code : str | None = None ) -> list [dict ]:
246+ def get_availabilities (self , location_code : str | None = None ) -> list [str ]:
232247 """Retrieves a list of available cluster types across locations.
233248
234249 Args:
@@ -238,4 +253,33 @@ def get_availabilities(self, location_code: str | None = None) -> list[dict]:
238253 List of available cluster types and their details.
239254 """
240255 query_params = {'location_code' : location_code }
241- return self ._http_client .get ('/cluster-availability' , params = query_params ).json ()
256+ response = self ._http_client .get ('/cluster-availability' , params = query_params ).json ()
257+ availabilities = response [0 ]['availabilities' ]
258+ return availabilities
259+
260+ def get_availability (self , cluster_type : str , location_code : str | None = None ) -> list [dict ]:
261+ """Checks if a specific cluster type is available for deployment.
262+
263+ Args:
264+ cluster_type: Type of cluster to check availability for.
265+ location_code: Optional datacenter location code.
266+
267+ Returns:
268+ True if the cluster type is available, False otherwise.
269+ """
270+
271+ def get_cluster_images (
272+ self ,
273+ cluster_type : str | None = None ,
274+ ) -> list [str ]:
275+ """Retrieves a list of available images for a given cluster type (optional).
276+
277+ Args:
278+ cluster_type: Type of cluster to get images for.
279+
280+ Returns:
281+ List of available images for the given cluster type.
282+ """
283+ query_params = {'instance_type' : cluster_type }
284+ images = self ._http_client .get ('/images/cluster' , params = query_params ).json ()
285+ return [image ['image_type' ] for image in images ]
0 commit comments