Skip to content

Commit 4dc5ddb

Browse files
committed
added default value to container registry, improved examples
1 parent 0d708b6 commit 4dc5ddb

File tree

3 files changed

+93
-102
lines changed

3 files changed

+93
-102
lines changed

datacrunch/containers/containers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
creation, updates, deletion, and monitoring of containerized applications.
55
"""
66

7-
from dataclasses import dataclass
7+
from dataclasses import dataclass, field
88
from dataclasses_json import dataclass_json, Undefined # type: ignore
99
from typing import List, Optional, Dict, Any
1010
from enum import Enum
@@ -284,9 +284,10 @@ class Deployment:
284284
"""
285285

286286
name: str
287-
container_registry_settings: ContainerRegistrySettings
288287
containers: List[Container]
289288
compute: ComputeResource
289+
container_registry_settings: ContainerRegistrySettings = field(
290+
default_factory=lambda: ContainerRegistrySettings(is_private=False))
290291
is_spot: bool = False
291292
endpoint_base_url: Optional[str] = None
292293
scaling: Optional[ScalingOptions] = None

examples/containers/sglang_deployment_example.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# Get confidential values from environment variables
4242
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
4343
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
44-
INFERENCE_KEY = os.environ.get('INFERENCE_KEY')
44+
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
4545
HF_TOKEN = os.environ.get('HF_TOKEN')
4646

4747

@@ -99,7 +99,7 @@ def graceful_shutdown(signum, frame) -> None:
9999

100100
try:
101101
# Get the inference API key
102-
inference_key = INFERENCE_KEY
102+
inference_key = DATACRUNCH_INFERENCE_KEY
103103
if not inference_key:
104104
inference_key = input(
105105
"Enter your Inference API Key from the DataCrunch dashboard: ")
@@ -188,15 +188,12 @@ def graceful_shutdown(signum, frame) -> None:
188188
)
189189
)
190190

191-
# Create registry and compute settings
192-
registry_settings = ContainerRegistrySettings(is_private=False)
193-
# For a 7B model, General Compute (24GB VRAM) is sufficient
191+
# Set compute settings. For a 7B model, General Compute (24GB VRAM) is sufficient
194192
compute = ComputeResource(name="General Compute", size=1)
195193

196-
# Create deployment object
194+
# Create deployment object (no need to provide container_registry_settings because it's public)
197195
deployment = Deployment(
198196
name=DEPLOYMENT_NAME,
199-
container_registry_settings=registry_settings,
200197
containers=[container],
201198
compute=compute,
202199
scaling=scaling_options,
@@ -207,7 +204,7 @@ def graceful_shutdown(signum, frame) -> None:
207204
created_deployment = datacrunch.containers.create_deployment(
208205
deployment)
209206
print(f"Created deployment: {created_deployment.name}")
210-
print("This will take several minutes while the model is downloaded and the server starts...")
207+
print("This could take several minutes while the model is downloaded and the server starts...")
211208

212209
# Wait for deployment to be healthy
213210
if not wait_for_deployment_health(datacrunch, DEPLOYMENT_NAME):

examples/containers/update_deployment_scaling_example.py

Lines changed: 85 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -15,106 +15,99 @@
1515
UtilizationScalingTrigger
1616
)
1717

18-
# Configuration - replace with your deployment name
19-
DEPLOYMENT_NAME = "my-deployment"
2018

21-
# Get client secret and id from environment variables
19+
# Get deployment name, client secret and id from environment variables
20+
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
2221
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
2322
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
2423

25-
26-
def check_deployment_exists(client: DataCrunchClient, deployment_name: str) -> bool:
27-
"""Check if a deployment exists.
28-
29-
Args:
30-
client: DataCrunch API client
31-
deployment_name: Name of the deployment to check
32-
33-
Returns:
34-
bool: True if deployment exists, False otherwise
35-
"""
36-
try:
37-
client.containers.get_deployment_by_name(deployment_name)
38-
return True
39-
except APIException as e:
40-
print(f"Error: {e}")
41-
return False
42-
43-
44-
def update_deployment_scaling(client: DataCrunchClient, deployment_name: str) -> None:
45-
"""Update scaling options using the dedicated scaling options API.
46-
47-
Args:
48-
client: DataCrunch API client
49-
deployment_name: Name of the deployment to update
50-
"""
51-
try:
52-
# Create scaling options using ScalingOptions dataclass
53-
scaling_options = ScalingOptions(
54-
min_replica_count=1,
55-
max_replica_count=5,
56-
scale_down_policy=ScalingPolicy(
57-
delay_seconds=600), # Longer cooldown period
58-
scale_up_policy=ScalingPolicy(delay_seconds=60), # Quick scale-up
59-
queue_message_ttl_seconds=500,
60-
concurrent_requests_per_replica=1,
61-
scaling_triggers=ScalingTriggers(
62-
queue_load=QueueLoadScalingTrigger(threshold=1.0),
63-
cpu_utilization=UtilizationScalingTrigger(
64-
enabled=True,
65-
threshold=75
66-
),
67-
gpu_utilization=UtilizationScalingTrigger(
68-
enabled=False # Disable GPU utilization trigger
69-
)
24+
# Initialize client
25+
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
26+
27+
try:
28+
# Get current scaling options
29+
scaling_options = datacrunch.containers.get_deployment_scaling_options(
30+
DEPLOYMENT_NAME)
31+
32+
print(f"Current scaling configuration:\n")
33+
print(f"Min replicas: {scaling_options.min_replica_count}")
34+
print(f"Max replicas: {scaling_options.max_replica_count}")
35+
print(
36+
f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds")
37+
print(
38+
f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds")
39+
print(
40+
f"Queue message TTL: {scaling_options.queue_message_ttl_seconds} seconds")
41+
print(
42+
f"Concurrent requests per replica: {scaling_options.concurrent_requests_per_replica}")
43+
print("Scaling Triggers:")
44+
print(
45+
f" Queue load threshold: {scaling_options.scaling_triggers.queue_load.threshold}")
46+
if scaling_options.scaling_triggers.cpu_utilization:
47+
print(
48+
f" CPU utilization enabled: {scaling_options.scaling_triggers.cpu_utilization.enabled}")
49+
print(
50+
f" CPU utilization threshold: {scaling_options.scaling_triggers.cpu_utilization.threshold}%")
51+
if scaling_options.scaling_triggers.gpu_utilization:
52+
print(
53+
f" GPU utilization enabled: {scaling_options.scaling_triggers.gpu_utilization.enabled}")
54+
if scaling_options.scaling_triggers.gpu_utilization.threshold:
55+
print(
56+
f" GPU utilization threshold: {scaling_options.scaling_triggers.gpu_utilization.threshold}%")
57+
58+
# Create scaling options using ScalingOptions dataclass
59+
scaling_options = ScalingOptions(
60+
min_replica_count=1,
61+
max_replica_count=5,
62+
scale_down_policy=ScalingPolicy(
63+
delay_seconds=600), # Longer cooldown period
64+
scale_up_policy=ScalingPolicy(delay_seconds=0), # Quick scale-up
65+
queue_message_ttl_seconds=500,
66+
concurrent_requests_per_replica=50, # LLMs can handle concurrent requests
67+
scaling_triggers=ScalingTriggers(
68+
queue_load=QueueLoadScalingTrigger(threshold=1.0),
69+
cpu_utilization=UtilizationScalingTrigger(
70+
enabled=True,
71+
threshold=75
72+
),
73+
gpu_utilization=UtilizationScalingTrigger(
74+
enabled=False # Disable GPU utilization trigger
7075
)
7176
)
72-
73-
# Update scaling options
74-
updated_options = client.containers.update_deployment_scaling_options(
75-
deployment_name, scaling_options)
76-
print(f"Updated deployment scaling options")
77-
print(f"New min replicas: {updated_options.min_replica_count}")
78-
print(f"New max replicas: {updated_options.max_replica_count}")
77+
)
78+
79+
# Update scaling options
80+
updated_options = datacrunch.containers.update_deployment_scaling_options(
81+
DEPLOYMENT_NAME, scaling_options)
82+
83+
print(f"\nUpdated scaling configuration:\n")
84+
print(f"Min replicas: {updated_options.min_replica_count}")
85+
print(f"Max replicas: {updated_options.max_replica_count}")
86+
print(
87+
f"Scale-up delay: {updated_options.scale_up_policy.delay_seconds} seconds")
88+
print(
89+
f"Scale-down delay: {updated_options.scale_down_policy.delay_seconds} seconds")
90+
print(
91+
f"Queue message TTL: {updated_options.queue_message_ttl_seconds} seconds")
92+
print(
93+
f"Concurrent requests per replica: {updated_options.concurrent_requests_per_replica}")
94+
print("Scaling Triggers:")
95+
print(
96+
f" Queue load threshold: {updated_options.scaling_triggers.queue_load.threshold}")
97+
if updated_options.scaling_triggers.cpu_utilization:
7998
print(
80-
f"CPU utilization trigger enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}")
99+
f" CPU utilization enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}")
81100
print(
82-
f"CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%")
83-
except APIException as e:
84-
print(f"Error updating scaling options: {e}")
85-
86-
87-
def main() -> None:
88-
"""Main function demonstrating scaling updates."""
89-
try:
90-
# Initialize client
91-
datacrunch = DataCrunchClient(
92-
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
93-
94-
# Verify deployment exists
95-
if not check_deployment_exists(datacrunch, DEPLOYMENT_NAME):
96-
print(f"Deployment {DEPLOYMENT_NAME} does not exist.")
97-
return
98-
99-
# Update scaling options using the API
100-
update_deployment_scaling(datacrunch, DEPLOYMENT_NAME)
101-
102-
# Get current scaling options
103-
scaling_options = datacrunch.containers.get_deployment_scaling_options(
104-
DEPLOYMENT_NAME)
105-
print(f"\nCurrent scaling configuration:")
106-
print(f"Min replicas: {scaling_options.min_replica_count}")
107-
print(f"Max replicas: {scaling_options.max_replica_count}")
108-
print(
109-
f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds")
101+
f" CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%")
102+
if updated_options.scaling_triggers.gpu_utilization:
110103
print(
111-
f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds")
112-
113-
print("\nScaling update completed successfully.")
114-
115-
except Exception as e:
116-
print(f"Unexpected error: {e}")
104+
f" GPU utilization enabled: {updated_options.scaling_triggers.gpu_utilization.enabled}")
105+
if updated_options.scaling_triggers.gpu_utilization.threshold:
106+
print(
107+
f" GPU utilization threshold: {updated_options.scaling_triggers.gpu_utilization.threshold}%")
117108

118109

119-
if __name__ == "__main__":
120-
main()
110+
except APIException as e:
111+
print(f"Error updating scaling options: {e}")
112+
except Exception as e:
113+
print(f"Unexpected error: {e}")

0 commit comments

Comments
 (0)