added default value to container registry, improved examples

tamirse · tamirse · commit 4dc5ddbaed6e · 2025-04-04T15:26:36.000+03:00
diff --git a/datacrunch/containers/containers.py b/datacrunch/containers/containers.py
@@ -4,7 +4,7 @@
 creation, updates, deletion, and monitoring of containerized applications.
 """
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from dataclasses_json import dataclass_json, Undefined  # type: ignore
 from typing import List, Optional, Dict, Any
 from enum import Enum
@@ -284,9 +284,10 @@ class Deployment:
     """
 
     name: str
-    container_registry_settings: ContainerRegistrySettings
     containers: List[Container]
     compute: ComputeResource
+    container_registry_settings: ContainerRegistrySettings = field(
+        default_factory=lambda: ContainerRegistrySettings(is_private=False))
     is_spot: bool = False
     endpoint_base_url: Optional[str] = None
     scaling: Optional[ScalingOptions] = None
diff --git a/examples/containers/sglang_deployment_example.py b/examples/containers/sglang_deployment_example.py
@@ -41,7 +41,7 @@
 # Get confidential values from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
-INFERENCE_KEY = os.environ.get('INFERENCE_KEY')
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 HF_TOKEN = os.environ.get('HF_TOKEN')
 
 
@@ -99,7 +99,7 @@ def graceful_shutdown(signum, frame) -> None:
 
 try:
     # Get the inference API key
-    inference_key = INFERENCE_KEY
+    inference_key = DATACRUNCH_INFERENCE_KEY
     if not inference_key:
         inference_key = input(
             "Enter your Inference API Key from the DataCrunch dashboard: ")
@@ -188,15 +188,12 @@ def graceful_shutdown(signum, frame) -> None:
         )
     )
 
-    # Create registry and compute settings
-    registry_settings = ContainerRegistrySettings(is_private=False)
-    # For a 7B model, General Compute (24GB VRAM) is sufficient
+    # Set compute settings. For a 7B model, General Compute (24GB VRAM) is sufficient
     compute = ComputeResource(name="General Compute", size=1)
 
-    # Create deployment object
+    # Create deployment object (no need to provide container_registry_settings because it's public)
     deployment = Deployment(
         name=DEPLOYMENT_NAME,
-        container_registry_settings=registry_settings,
         containers=[container],
         compute=compute,
         scaling=scaling_options,
@@ -207,7 +204,7 @@ def graceful_shutdown(signum, frame) -> None:
     created_deployment = datacrunch.containers.create_deployment(
         deployment)
     print(f"Created deployment: {created_deployment.name}")
-    print("This will take several minutes while the model is downloaded and the server starts...")
+    print("This could take several minutes while the model is downloaded and the server starts...")
 
     # Wait for deployment to be healthy
     if not wait_for_deployment_health(datacrunch, DEPLOYMENT_NAME):
diff --git a/examples/containers/update_deployment_scaling_example.py b/examples/containers/update_deployment_scaling_example.py
@@ -15,106 +15,99 @@
     UtilizationScalingTrigger
 )
 
-# Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "my-deployment"
 
-# Get client secret and id from environment variables
+# Get deployment name, client secret and id from environment variables
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 
-
-def check_deployment_exists(client: DataCrunchClient, deployment_name: str) -> bool:
-    """Check if a deployment exists.
-
-    Args:
-        client: DataCrunch API client
-        deployment_name: Name of the deployment to check
-
-    Returns:
-        bool: True if deployment exists, False otherwise
-    """
-    try:
-        client.containers.get_deployment_by_name(deployment_name)
-        return True
-    except APIException as e:
-        print(f"Error: {e}")
-        return False
-
-
-def update_deployment_scaling(client: DataCrunchClient, deployment_name: str) -> None:
-    """Update scaling options using the dedicated scaling options API.
-
-    Args:
-        client: DataCrunch API client
-        deployment_name: Name of the deployment to update
-    """
-    try:
-        # Create scaling options using ScalingOptions dataclass
-        scaling_options = ScalingOptions(
-            min_replica_count=1,
-            max_replica_count=5,
-            scale_down_policy=ScalingPolicy(
-                delay_seconds=600),  # Longer cooldown period
-            scale_up_policy=ScalingPolicy(delay_seconds=60),  # Quick scale-up
-            queue_message_ttl_seconds=500,
-            concurrent_requests_per_replica=1,
-            scaling_triggers=ScalingTriggers(
-                queue_load=QueueLoadScalingTrigger(threshold=1.0),
-                cpu_utilization=UtilizationScalingTrigger(
-                    enabled=True,
-                    threshold=75
-                ),
-                gpu_utilization=UtilizationScalingTrigger(
-                    enabled=False  # Disable GPU utilization trigger
-                )
+# Initialize client
+datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
+
+try:
+    # Get current scaling options
+    scaling_options = datacrunch.containers.get_deployment_scaling_options(
+        DEPLOYMENT_NAME)
+
+    print(f"Current scaling configuration:\n")
+    print(f"Min replicas: {scaling_options.min_replica_count}")
+    print(f"Max replicas: {scaling_options.max_replica_count}")
+    print(
+        f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds")
+    print(
+        f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds")
+    print(
+        f"Queue message TTL: {scaling_options.queue_message_ttl_seconds} seconds")
+    print(
+        f"Concurrent requests per replica: {scaling_options.concurrent_requests_per_replica}")
+    print("Scaling Triggers:")
+    print(
+        f"  Queue load threshold: {scaling_options.scaling_triggers.queue_load.threshold}")
+    if scaling_options.scaling_triggers.cpu_utilization:
+        print(
+            f"  CPU utilization enabled: {scaling_options.scaling_triggers.cpu_utilization.enabled}")
+        print(
+            f"  CPU utilization threshold: {scaling_options.scaling_triggers.cpu_utilization.threshold}%")
+    if scaling_options.scaling_triggers.gpu_utilization:
+        print(
+            f"  GPU utilization enabled: {scaling_options.scaling_triggers.gpu_utilization.enabled}")
+        if scaling_options.scaling_triggers.gpu_utilization.threshold:
+            print(
+                f"  GPU utilization threshold: {scaling_options.scaling_triggers.gpu_utilization.threshold}%")
+
+    # Create scaling options using ScalingOptions dataclass
+    scaling_options = ScalingOptions(
+        min_replica_count=1,
+        max_replica_count=5,
+        scale_down_policy=ScalingPolicy(
+            delay_seconds=600),  # Longer cooldown period
+        scale_up_policy=ScalingPolicy(delay_seconds=0),  # Quick scale-up
+        queue_message_ttl_seconds=500,
+        concurrent_requests_per_replica=50,  # LLMs can handle concurrent requests
+        scaling_triggers=ScalingTriggers(
+            queue_load=QueueLoadScalingTrigger(threshold=1.0),
+            cpu_utilization=UtilizationScalingTrigger(
+                enabled=True,
+                threshold=75
+            ),
+            gpu_utilization=UtilizationScalingTrigger(
+                enabled=False  # Disable GPU utilization trigger
             )
         )
-
-        # Update scaling options
-        updated_options = client.containers.update_deployment_scaling_options(
-            deployment_name, scaling_options)
-        print(f"Updated deployment scaling options")
-        print(f"New min replicas: {updated_options.min_replica_count}")
-        print(f"New max replicas: {updated_options.max_replica_count}")
+    )
+
+    # Update scaling options
+    updated_options = datacrunch.containers.update_deployment_scaling_options(
+        DEPLOYMENT_NAME, scaling_options)
+
+    print(f"\nUpdated scaling configuration:\n")
+    print(f"Min replicas: {updated_options.min_replica_count}")
+    print(f"Max replicas: {updated_options.max_replica_count}")
+    print(
+        f"Scale-up delay: {updated_options.scale_up_policy.delay_seconds} seconds")
+    print(
+        f"Scale-down delay: {updated_options.scale_down_policy.delay_seconds} seconds")
+    print(
+        f"Queue message TTL: {updated_options.queue_message_ttl_seconds} seconds")
+    print(
+        f"Concurrent requests per replica: {updated_options.concurrent_requests_per_replica}")
+    print("Scaling Triggers:")
+    print(
+        f"  Queue load threshold: {updated_options.scaling_triggers.queue_load.threshold}")
+    if updated_options.scaling_triggers.cpu_utilization:
         print(
-            f"CPU utilization trigger enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}")
+            f"  CPU utilization enabled: {updated_options.scaling_triggers.cpu_utilization.enabled}")
         print(
-            f"CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%")
-    except APIException as e:
-        print(f"Error updating scaling options: {e}")
-
-
-def main() -> None:
-    """Main function demonstrating scaling updates."""
-    try:
-        # Initialize client
-        datacrunch = DataCrunchClient(
-            DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
-
-        # Verify deployment exists
-        if not check_deployment_exists(datacrunch, DEPLOYMENT_NAME):
-            print(f"Deployment {DEPLOYMENT_NAME} does not exist.")
-            return
-
-        # Update scaling options using the API
-        update_deployment_scaling(datacrunch, DEPLOYMENT_NAME)
-
-        # Get current scaling options
-        scaling_options = datacrunch.containers.get_deployment_scaling_options(
-            DEPLOYMENT_NAME)
-        print(f"\nCurrent scaling configuration:")
-        print(f"Min replicas: {scaling_options.min_replica_count}")
-        print(f"Max replicas: {scaling_options.max_replica_count}")
-        print(
-            f"Scale-up delay: {scaling_options.scale_up_policy.delay_seconds} seconds")
+            f"  CPU utilization threshold: {updated_options.scaling_triggers.cpu_utilization.threshold}%")
+    if updated_options.scaling_triggers.gpu_utilization:
         print(
-            f"Scale-down delay: {scaling_options.scale_down_policy.delay_seconds} seconds")
-
-        print("\nScaling update completed successfully.")
-
-    except Exception as e:
-        print(f"Unexpected error: {e}")
+            f"  GPU utilization enabled: {updated_options.scaling_triggers.gpu_utilization.enabled}")
+        if updated_options.scaling_triggers.gpu_utilization.threshold:
+            print(
+                f"  GPU utilization threshold: {updated_options.scaling_triggers.gpu_utilization.threshold}%")
 
 
-if __name__ == "__main__":
-    main()
+except APIException as e:
+    print(f"Error updating scaling options: {e}")
+except Exception as e:
+    print(f"Unexpected error: {e}")