diff --git a/CHANGELOG.md b/CHANGELOG.md index ce30412..3808896 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Added `max_wait_time`, `initial_interval`, `max_interval`, `backoff_coefficient` keyword arguments to `instances.create()` + +### Changed + +- Cap `instances.create()` retry interval to 5 seconds; add exponential backoff; increase default `max_wait_time` from 60 to 180 seconds + ## [1.14.0] - 2025-08-15 ### Added diff --git a/datacrunch/instances/instances.py b/datacrunch/instances/instances.py index ce67c43..1cb8c2c 100644 --- a/datacrunch/instances/instances.py +++ b/datacrunch/instances/instances.py @@ -1,4 +1,5 @@ import time +import itertools from typing import List, Union, Optional, Dict, Literal from dataclasses import dataclass from dataclasses_json import dataclass_json @@ -123,7 +124,12 @@ def create(self, is_spot: bool = False, contract: Optional[Contract] = None, pricing: Optional[Pricing] = None, - coupon: Optional[str] = None) -> Instance: + coupon: Optional[str] = None, + *, + max_wait_time: float = 180, + initial_interval: float = 0.5, + max_interval: float = 5, + backoff_coefficient: float = 2.0) -> Instance: """Creates and deploys a new cloud instance. Args: @@ -141,6 +147,10 @@ def create(self, contract: Optional contract type for the instance. pricing: Optional pricing model for the instance. coupon: Optional coupon code for discounts. + max_wait_time: Maximum total wait for the instance to start provisioning, in seconds (default: 180) + initial_interval: Initial interval, in seconds (default: 0.5) + max_interval: The longest single delay allowed between retries, in seconds (default: 5) + backoff_coefficient: Coefficient to calculate the next retry interval (default 2.0) Returns: The newly created instance object. @@ -169,20 +179,19 @@ def create(self, id = self._http_client.post(INSTANCES_ENDPOINT, json=payload).text # Wait for instance to enter provisioning state with timeout - MAX_WAIT_TIME = 60 # Maximum wait time in seconds - POLL_INTERVAL = 0.5 # Time between status checks - - start_time = time.time() - while True: + deadline = time.monotonic() + max_wait_time + for i in itertools.count(): instance = self.get_by_id(id) if instance.status != InstanceStatus.ORDERED: return instance - if time.time() - start_time > MAX_WAIT_TIME: + now = time.monotonic() + if now >= deadline: raise TimeoutError( - f"Instance {id} did not enter provisioning state within {MAX_WAIT_TIME} seconds") + f"Instance {id} did not enter provisioning state within {max_wait_time:.1f} seconds") - time.sleep(POLL_INTERVAL) + interval = min(initial_interval * backoff_coefficient ** i, max_interval, deadline - now) + time.sleep(interval) def action(self, id_list: Union[List[str], str], action: str, volume_ids: Optional[List[str]] = None) -> None: """Performs an action on one or more instances.