|
15 | 15 | UtilizationScalingTrigger |
16 | 16 | ) |
17 | 17 |
|
18 | | -# Configuration - replace with your deployment name |
19 | | -DEPLOYMENT_NAME = "my-deployment" |
20 | 18 |
|
21 | | -# Get client secret and id from environment variables |
| 19 | +# Get deployment name, client secret and id from environment variables |
| 20 | +DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME') |
22 | 21 | DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID') |
23 | 22 | DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET') |
24 | 23 |
|
25 | | - |
26 | | -def check_deployment_exists(client: DataCrunchClient, deployment_name: str) -> bool: |
27 | | - """Check if a deployment exists. |
28 | | -
|
29 | | - Args: |
30 | | - client: DataCrunch API client |
31 | | - deployment_name: Name of the deployment to check |
32 | | -
|
33 | | - Returns: |
34 | | - bool: True if deployment exists, False otherwise |
35 | | - """ |
36 | | - try: |
37 | | - client.containers.get_deployment_by_name(deployment_name) |
38 | | - return True |
39 | | - except APIException as e: |
40 | | - print(f"Error: {e}") |
41 | | - return False |
42 | | - |
43 | | - |
44 | | -def update_deployment_scaling(client: DataCrunchClient, deployment_name: str) -> None: |
45 | | - """Update scaling options using the dedicated scaling options API. |
46 | | -
|
47 | | - Args: |
48 | | - client: DataCrunch API client |
49 | | - deployment_name: Name of the deployment to update |
50 | | - """ |
51 | | - try: |
52 | | - # Create scaling options using ScalingOptions dataclass |
53 | | - scaling_options = ScalingOptions( |
54 | | - min_replica_count=1, |
55 | | - max_replica_count=5, |
56 | | - scale_down_policy=ScalingPolicy( |
57 | | - delay_seconds=600), # Longer cooldown period |
58 | | - scale_up_policy=ScalingPolicy(delay_seconds=60), # Quick scale-up |
59 | | - queue_message_ttl_seconds=500, |
60 | | - concurrent_requests_per_replica=1, |
61 | | - scaling_triggers=ScalingTriggers( |
62 | | - queue_load=QueueLoadScalingTrigger(threshold=1.0), |
63 | | - cpu_utilization=UtilizationScalingTrigger( |
64 | | - enabled=True, |
65 | | - threshold=75 |
66 | | - ), |
67 | | - gpu_utilization=UtilizationScalingTrigger( |
68 | | - enabled=False # Disable GPU utilization trigger |
69 | | - ) |
| 24 | +# Initialize client |
| 25 | +datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET) |
| 26 | + |
| 27 | +try: |
| 28 | + # Get current scaling options |
| 29 | + scaling_options = datacrunch.containers.get_deployment_scaling_options( |
| 30 | + DEPLOYMENT_NAME) |
| 31 | + |
| 32 | + print(f"Current scaling configuration:\n") |
| 33 | + print(f"Min replicas: {scaling_options.min_replica_count}") |
| 34 | + print(f"Max replicas: {scaling_options.max_replica_count}") |
| 35 | + print( |
| 36 | + f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds") |
| 37 | + print( |
| 38 | + f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds") |
| 39 | + print( |
| 40 | + f"Queue message TTL: {scaling_options.queue_message_ttl_seconds} seconds") |
| 41 | + print( |
| 42 | + f"Concurrent requests per replica: {scaling_options.concurrent_requests_per_replica}") |
| 43 | + print("Scaling Triggers:") |
| 44 | + print( |
| 45 | + f" Queue load threshold: {scaling_options.scaling_triggers.queue_load.threshold}") |
| 46 | + if scaling_options.scaling_triggers.cpu_utilization: |
| 47 | + print( |
| 48 | + f" CPU utilization enabled: {scaling_options.scaling_triggers.cpu_utilization.enabled}") |
| 49 | + print( |
| 50 | + f" CPU utilization threshold: {scaling_options.scaling_triggers.cpu_utilization.threshold}%") |
| 51 | + if scaling_options.scaling_triggers.gpu_utilization: |
| 52 | + print( |
| 53 | + f" GPU utilization enabled: {scaling_options.scaling_triggers.gpu_utilization.enabled}") |
| 54 | + if scaling_options.scaling_triggers.gpu_utilization.threshold: |
| 55 | + print( |
| 56 | + f" GPU utilization threshold: {scaling_options.scaling_triggers.gpu_utilization.threshold}%") |
| 57 | + |
| 58 | + # Create scaling options using ScalingOptions dataclass |
| 59 | + scaling_options = ScalingOptions( |
| 60 | + min_replica_count=1, |
| 61 | + max_replica_count=5, |
| 62 | + scale_down_policy=ScalingPolicy( |
| 63 | + delay_seconds=600), # Longer cooldown period |
| 64 | + scale_up_policy=ScalingPolicy(delay_seconds=0), # Quick scale-up |
| 65 | + queue_message_ttl_seconds=500, |
| 66 | + concurrent_requests_per_replica=50, # LLMs can handle concurrent requests |
| 67 | + scaling_triggers=ScalingTriggers( |
| 68 | + queue_load=QueueLoadScalingTrigger(threshold=1.0), |
| 69 | + cpu_utilization=UtilizationScalingTrigger( |
| 70 | + enabled=True, |
| 71 | + threshold=75 |
| 72 | + ), |
| 73 | + gpu_utilization=UtilizationScalingTrigger( |
| 74 | + enabled=False # Disable GPU utilization trigger |
70 | 75 | ) |
71 | 76 | ) |
72 | | - |
73 | | - # Update scaling options |
74 | | - updated_options = client.containers.update_deployment_scaling_options( |
75 | | - deployment_name, scaling_options) |
76 | | - print(f"Updated deployment scaling options") |
77 | | - print(f"New min replicas: {updated_options.min_replica_count}") |
78 | | - print(f"New max replicas: {updated_options.max_replica_count}") |
| 77 | + ) |
| 78 | + |
| 79 | + # Update scaling options |
| 80 | + updated_options = datacrunch.containers.update_deployment_scaling_options( |
| 81 | + DEPLOYMENT_NAME, scaling_options) |
| 82 | + |
| 83 | + print(f"\nUpdated scaling configuration:\n") |
| 84 | + print(f"Min replicas: {updated_options.min_replica_count}") |
| 85 | + print(f"Max replicas: {updated_options.max_replica_count}") |
| 86 | + print( |
| 87 | + f"Scale-up delay: {updated_options.scale_up_policy.delay_seconds} seconds") |
| 88 | + print( |
| 89 | + f"Scale-down delay: {updated_options.scale_down_policy.delay_seconds} seconds") |
| 90 | + print( |
| 91 | + f"Queue message TTL: {updated_options.queue_message_ttl_seconds} seconds") |
| 92 | + print( |
| 93 | + f"Concurrent requests per replica: {updated_options.concurrent_requests_per_replica}") |
| 94 | + print("Scaling Triggers:") |
| 95 | + print( |
| 96 | + f" Queue load threshold: {updated_options.scaling_triggers.queue_load.threshold}") |
| 97 | + if updated_options.scaling_triggers.cpu_utilization: |
79 | 98 | print( |
80 | | - f"CPU utilization trigger enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}") |
| 99 | + f" CPU utilization enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}") |
81 | 100 | print( |
82 | | - f"CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%") |
83 | | - except APIException as e: |
84 | | - print(f"Error updating scaling options: {e}") |
85 | | - |
86 | | - |
87 | | -def main() -> None: |
88 | | - """Main function demonstrating scaling updates.""" |
89 | | - try: |
90 | | - # Initialize client |
91 | | - datacrunch = DataCrunchClient( |
92 | | - DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET) |
93 | | - |
94 | | - # Verify deployment exists |
95 | | - if not check_deployment_exists(datacrunch, DEPLOYMENT_NAME): |
96 | | - print(f"Deployment {DEPLOYMENT_NAME} does not exist.") |
97 | | - return |
98 | | - |
99 | | - # Update scaling options using the API |
100 | | - update_deployment_scaling(datacrunch, DEPLOYMENT_NAME) |
101 | | - |
102 | | - # Get current scaling options |
103 | | - scaling_options = datacrunch.containers.get_deployment_scaling_options( |
104 | | - DEPLOYMENT_NAME) |
105 | | - print(f"\nCurrent scaling configuration:") |
106 | | - print(f"Min replicas: {scaling_options.min_replica_count}") |
107 | | - print(f"Max replicas: {scaling_options.max_replica_count}") |
108 | | - print( |
109 | | - f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds") |
| 101 | + f" CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%") |
| 102 | + if updated_options.scaling_triggers.gpu_utilization: |
110 | 103 | print( |
111 | | - f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds") |
112 | | - |
113 | | - print("\nScaling update completed successfully.") |
114 | | - |
115 | | - except Exception as e: |
116 | | - print(f"Unexpected error: {e}") |
| 104 | + f" GPU utilization enabled: {updated_options.scaling_triggers.gpu_utilization.enabled}") |
| 105 | + if updated_options.scaling_triggers.gpu_utilization.threshold: |
| 106 | + print( |
| 107 | + f" GPU utilization threshold: {updated_options.scaling_triggers.gpu_utilization.threshold}%") |
117 | 108 |
|
118 | 109 |
|
119 | | -if __name__ == "__main__": |
120 | | - main() |
| 110 | +except APIException as e: |
| 111 | + print(f"Error updating scaling options: {e}") |
| 112 | +except Exception as e: |
| 113 | + print(f"Unexpected error: {e}") |
0 commit comments