async inference

jaakkovarjo · jaakkovarjo · commit 86cec409c21b · 2025-04-11T16:11:24.000+03:00
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -24,26 +24,41 @@ DataCrunch's Public API documentation [is available here](https://api.datacrunch
 
 - Generate your client credentials - [instructions in the public API docs](https://api.datacrunch.io/v1/docs#description/quick-start-guide).
 
-- Add the client secret to an environment variable (don't want it to be hardcoded):
+
+- Add your client id and client secret to an environment variable (don't want it to be hardcoded):
 
   Linux (bash):
 
   ```bash
-  export DATACRUNCH_CLIENT_SECRET=Z4CZq02rdwdB7ISV0k4Z2gtwAFKiyvr2U1l0KDIeYi
+  export DATACRUNCH_CLIENT_ID=YOUR_ID_HERE
+  export DATACRUNCH_CLIENT_SECRET=YOUR_SECRET_HERE
   ```
 
+- To enable sending inference requests from SDK you must generate an inference key - [Instructions on inference authorization](https://docs.datacrunch.io/inference/authorization)
+  
+
+- Add your inference key to an environment variable
+
+  Linux (bash):
+ 
+  ```bash
+  export DATACRUNCH_INFERENCE_KEY=YOUR_API_KEY_HERE
+  ```
+  
   Other platforms:
   https://en.wikipedia.org/wiki/Environment_variable
 
+
+
 - Example for creating a new instance:
 
   ```python
   import os
   from datacrunch import DataCrunchClient
 
-  # Get client secret from environment variable
+  # Get credentials from environment variables
+  CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
   CLIENT_SECRET = os.environ['DATACRUNCH_CLIENT_SECRET']
-  CLIENT_ID = 'Ibk5bdxV64lKAWOqYnvSi'
 
   # Create datcrunch client
   datacrunch = DataCrunchClient(CLIENT_ID, CLIENT_SECRET)
@@ -118,7 +133,7 @@ Create this file in the root directory of the project:
 from datacrunch.datacrunch import DataCrunchClient
 
 CLIENT_SECRET = 'secret'
-CLIENT_ID = 'Ibk5bdxV64lKAWOqYnvSi'
+CLIENT_ID = 'your-id'
 
 # Create datcrunch client
 datacrunch = DataCrunchClient(CLIENT_ID, CLIENT_SECRET, base_url='http://localhost:3001/v1')
diff --git a/datacrunch/InferenceClient/inference_client.py b/datacrunch/InferenceClient/inference_client.py
@@ -4,12 +4,17 @@
 from requests.structures import CaseInsensitiveDict
 from typing import Optional, Dict, Any, Union, Generator
 from urllib.parse import urlparse
-
+from enum import Enum
 
 class InferenceClientError(Exception):
     """Base exception for InferenceClient errors."""
     pass
 
+class AsyncStatus(int, Enum):
+    Initialized = 0
+    Queue = 1
+    Inference = 2
+    Completed = 3
 
 @dataclass_json(undefined=Undefined.EXCLUDE)
 @dataclass
@@ -236,16 +241,16 @@ def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 *
             self._make_request(
                 http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
             return
-        # Add the "Prefer: async-inference" header to the request, to run async and wait for the response
-        headers['Prefer'] = 'async-inference'
+        # Add the "Prefer: respond-async" header to the request, to run async and wait for the response
+        headers['Prefer'] = 'respond-async'
 
         response = self._make_request(
             http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
 
-        # TODO: this response format isn't final
-        execution_id = response.json()['id']
+        result = response.json()
+        execution_id = result['Id']
 
-        return AsyncInferenceExecution(self, execution_id)
+        return AsyncInferenceExecution(self, execution_id, AsyncStatus.Initialized)
 
     def get(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
         return self._make_request('GET', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
@@ -292,11 +297,20 @@ def health(self, healthcheck_path: str = "/health") -> requests.Response:
 class AsyncInferenceExecution:
     _inference_client: 'InferenceClient'
     id: str
-    _status: str  # TODO: add a status enum?
+    _status: AsyncStatus
     INFERENCE_ID_HEADER = 'X-Inference-Id'
 
-    def status(self) -> Dict[str, Any]:
-        """Get the current status of the async inference execution.
+    def status(self) -> AsyncStatus:
+        """Get the current stored status of the async inference execution. Only the status value type
+
+        Returns:
+            AsyncStatus: The status object
+        """
+
+        return self._status
+
+    def status_json(self) -> Dict[str, Any]:
+        """Get the current status of the async inference execution. Return the status json
 
         Returns:
             Dict[str, Any]: The status response containing the execution status and other metadata
@@ -306,20 +320,24 @@ def status(self) -> Dict[str, Any]:
             url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
 
         response_json = response.json()
-        self._status = response_json['status']
+        self._status = AsyncStatus(response_json['Status'])
 
         return response_json
 
-    def result(self) -> Dict[str, Any]:
+    def result(self) -> Dict[str, Any] | str:
         """Get the results of the async inference execution.
 
         Returns:
             Dict[str, Any]: The results of the inference execution
         """
-        url = f'{self._inference_client.base_domain}/results/{self._inference_client.deployment_name}'
+        url = f'{self._inference_client.base_domain}/result/{self._inference_client.deployment_name}'
         response = self._inference_client._session.get(
             url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
 
-        return response
+        if response.headers['Content-Type'] == 'application/json':
+            return response.json()
+        else:
+            return response.text
+
     # alias for get_results
     output = result
diff --git a/examples/containers/calling_the_endpoint_asynchronously.py b/examples/containers/calling_the_endpoint_asynchronously.py
@@ -0,0 +1,43 @@
+import os
+from time import sleep
+from datacrunch import DataCrunchClient
+from datacrunch.InferenceClient.inference_client import AsyncStatus
+
+# Configuration - replace with your deployment name
+DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+
+# Get client secret and id from environment variables
+DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
+DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+
+# DataCrunch client instance
+datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+
+# Get the deployment
+deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
+
+# Make an asynchronous request to the endpoint.
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+header = {
+    "Content-Type": "application/json"
+}
+
+response = deployment.run(
+    data=data,
+    path='v1/completions',
+    headers=header,
+)
+
+while response.status() != AsyncStatus.Completed:
+    print(response.status_json())
+    sleep(1)
+print(response.output())
diff --git a/examples/containers/calling_the_endpoint_synchronously.py b/examples/containers/calling_the_endpoint_synchronously.py
@@ -2,15 +2,15 @@
 from datacrunch import DataCrunchClient
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example"
+DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
+datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
@@ -30,4 +30,4 @@
 )  # wait for the response
 
 # Print the response
-print(response.body)
+print(response.output())
diff --git a/examples/containers/delete_deployment_example.py b/examples/containers/delete_deployment_example.py
@@ -0,0 +1,18 @@
+"""Example script demonstrating deleting a deployment using the DataCrunch API.
+"""
+
+import os
+from datacrunch import DataCrunchClient
+
+DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+
+# Get confidential values from environment variables
+DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
+DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
+
+# Initialize client with inference key
+datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
+
+# Register signal handlers for cleanup
+datacrunch.containers.delete_deployment(DEPLOYMENT_NAME)
+print("Deployment deleted")
diff --git a/examples/containers/sglang_deployment_example.py b/examples/containers/sglang_deployment_example.py
@@ -44,7 +44,6 @@
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 HF_TOKEN = os.environ.get('HF_TOKEN')
 
-
 def wait_for_deployment_health(datacrunch_client: DataCrunchClient, deployment_name: str, max_attempts: int = 20, delay: int = 30) -> bool:
     """Wait for deployment to reach healthy status.
 
@@ -99,18 +98,18 @@ def graceful_shutdown(signum, frame) -> None:
 
 try:
     # Get the inference API key
-    inference_key = DATACRUNCH_INFERENCE_KEY
-    if not inference_key:
-        inference_key = input(
+    datacrunch_inference_key = DATACRUNCH_INFERENCE_KEY
+    if not datacrunch_inference_key:
+        datacrunch_inference_key = input(
             "Enter your Inference API Key from the DataCrunch dashboard: ")
     else:
         print("Using Inference API Key from environment")
 
     # Initialize client with inference key
     datacrunch = DataCrunchClient(
-        DATACRUNCH_CLIENT_ID,
-        DATACRUNCH_CLIENT_SECRET,
-        inference_key=inference_key
+        client_id=DATACRUNCH_CLIENT_ID,
+        client_secret=DATACRUNCH_CLIENT_SECRET,
+        inference_key=datacrunch_inference_key
     )
 
     # Register signal handlers for cleanup