Skip to content

Commit 86cec40

Browse files
committed
async inference
1 parent bf3d18f commit 86cec40

File tree

7 files changed

+121
-28
lines changed

7 files changed

+121
-28
lines changed

.DS_Store

6 KB
Binary file not shown.

README.md

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,41 @@ DataCrunch's Public API documentation [is available here](https://api.datacrunch
2424

2525
- Generate your client credentials - [instructions in the public API docs](https://api.datacrunch.io/v1/docs#description/quick-start-guide).
2626

27-
- Add the client secret to an environment variable (don't want it to be hardcoded):
27+
28+
- Add your client id and client secret to an environment variable (don't want it to be hardcoded):
2829

2930
Linux (bash):
3031

3132
```bash
32-
export DATACRUNCH_CLIENT_SECRET=Z4CZq02rdwdB7ISV0k4Z2gtwAFKiyvr2U1l0KDIeYi
33+
export DATACRUNCH_CLIENT_ID=YOUR_ID_HERE
34+
export DATACRUNCH_CLIENT_SECRET=YOUR_SECRET_HERE
3335
```
3436

37+
- To enable sending inference requests from SDK you must generate an inference key - [Instructions on inference authorization](https://docs.datacrunch.io/inference/authorization)
38+
39+
40+
- Add your inference key to an environment variable
41+
42+
Linux (bash):
43+
44+
```bash
45+
export DATACRUNCH_INFERENCE_KEY=YOUR_API_KEY_HERE
46+
```
47+
3548
Other platforms:
3649
https://en.wikipedia.org/wiki/Environment_variable
3750

51+
52+
3853
- Example for creating a new instance:
3954

4055
```python
4156
import os
4257
from datacrunch import DataCrunchClient
4358

44-
# Get client secret from environment variable
59+
# Get credentials from environment variables
60+
CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
4561
CLIENT_SECRET = os.environ['DATACRUNCH_CLIENT_SECRET']
46-
CLIENT_ID = 'Ibk5bdxV64lKAWOqYnvSi'
4762

4863
# Create datcrunch client
4964
datacrunch = DataCrunchClient(CLIENT_ID, CLIENT_SECRET)
@@ -118,7 +133,7 @@ Create this file in the root directory of the project:
118133
from datacrunch.datacrunch import DataCrunchClient
119134

120135
CLIENT_SECRET = 'secret'
121-
CLIENT_ID = 'Ibk5bdxV64lKAWOqYnvSi'
136+
CLIENT_ID = 'your-id'
122137

123138
# Create datcrunch client
124139
datacrunch = DataCrunchClient(CLIENT_ID, CLIENT_SECRET, base_url='http://localhost:3001/v1')

datacrunch/InferenceClient/inference_client.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
from requests.structures import CaseInsensitiveDict
55
from typing import Optional, Dict, Any, Union, Generator
66
from urllib.parse import urlparse
7-
7+
from enum import Enum
88

99
class InferenceClientError(Exception):
1010
"""Base exception for InferenceClient errors."""
1111
pass
1212

13+
class AsyncStatus(int, Enum):
14+
Initialized = 0
15+
Queue = 1
16+
Inference = 2
17+
Completed = 3
1318

1419
@dataclass_json(undefined=Undefined.EXCLUDE)
1520
@dataclass
@@ -236,16 +241,16 @@ def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 *
236241
self._make_request(
237242
http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
238243
return
239-
# Add the "Prefer: async-inference" header to the request, to run async and wait for the response
240-
headers['Prefer'] = 'async-inference'
244+
# Add the "Prefer: respond-async" header to the request, to run async and wait for the response
245+
headers['Prefer'] = 'respond-async'
241246

242247
response = self._make_request(
243248
http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
244249

245-
# TODO: this response format isn't final
246-
execution_id = response.json()['id']
250+
result = response.json()
251+
execution_id = result['Id']
247252

248-
return AsyncInferenceExecution(self, execution_id)
253+
return AsyncInferenceExecution(self, execution_id, AsyncStatus.Initialized)
249254

250255
def get(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
251256
return self._make_request('GET', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
@@ -292,11 +297,20 @@ def health(self, healthcheck_path: str = "/health") -> requests.Response:
292297
class AsyncInferenceExecution:
293298
_inference_client: 'InferenceClient'
294299
id: str
295-
_status: str # TODO: add a status enum?
300+
_status: AsyncStatus
296301
INFERENCE_ID_HEADER = 'X-Inference-Id'
297302

298-
def status(self) -> Dict[str, Any]:
299-
"""Get the current status of the async inference execution.
303+
def status(self) -> AsyncStatus:
304+
"""Get the current stored status of the async inference execution. Only the status value type
305+
306+
Returns:
307+
AsyncStatus: The status object
308+
"""
309+
310+
return self._status
311+
312+
def status_json(self) -> Dict[str, Any]:
313+
"""Get the current status of the async inference execution. Return the status json
300314
301315
Returns:
302316
Dict[str, Any]: The status response containing the execution status and other metadata
@@ -306,20 +320,24 @@ def status(self) -> Dict[str, Any]:
306320
url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
307321

308322
response_json = response.json()
309-
self._status = response_json['status']
323+
self._status = AsyncStatus(response_json['Status'])
310324

311325
return response_json
312326

313-
def result(self) -> Dict[str, Any]:
327+
def result(self) -> Dict[str, Any] | str:
314328
"""Get the results of the async inference execution.
315329
316330
Returns:
317331
Dict[str, Any]: The results of the inference execution
318332
"""
319-
url = f'{self._inference_client.base_domain}/results/{self._inference_client.deployment_name}'
333+
url = f'{self._inference_client.base_domain}/result/{self._inference_client.deployment_name}'
320334
response = self._inference_client._session.get(
321335
url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
322336

323-
return response
337+
if response.headers['Content-Type'] == 'application/json':
338+
return response.json()
339+
else:
340+
return response.text
341+
324342
# alias for get_results
325343
output = result
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import os
2+
from time import sleep
3+
from datacrunch import DataCrunchClient
4+
from datacrunch.InferenceClient.inference_client import AsyncStatus
5+
6+
# Configuration - replace with your deployment name
7+
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
8+
9+
# Get client secret and id from environment variables
10+
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
11+
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
12+
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
13+
14+
# DataCrunch client instance
15+
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
16+
17+
# Get the deployment
18+
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
19+
20+
# Make an asynchronous request to the endpoint.
21+
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
22+
data = {
23+
"model": "deepseek-ai/deepseek-llm-7b-chat",
24+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
25+
"max_tokens": 128,
26+
"temperature": 0.7,
27+
"top_p": 0.9
28+
}
29+
30+
header = {
31+
"Content-Type": "application/json"
32+
}
33+
34+
response = deployment.run(
35+
data=data,
36+
path='v1/completions',
37+
headers=header,
38+
)
39+
40+
while response.status() != AsyncStatus.Completed:
41+
print(response.status_json())
42+
sleep(1)
43+
print(response.output())

examples/containers/calling_the_endpoint_synchronously.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
from datacrunch import DataCrunchClient
33

44
# Configuration - replace with your deployment name
5-
DEPLOYMENT_NAME = "sglang-deployment-example"
5+
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
66

77
# Get client secret and id from environment variables
88
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
99
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
1010
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
1111

1212
# DataCrunch client instance
13-
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
13+
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
1414

1515
# Get the deployment
1616
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
@@ -30,4 +30,4 @@
3030
) # wait for the response
3131

3232
# Print the response
33-
print(response.body)
33+
print(response.output())
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Example script demonstrating deleting a deployment using the DataCrunch API.
2+
"""
3+
4+
import os
5+
from datacrunch import DataCrunchClient
6+
7+
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
8+
9+
# Get confidential values from environment variables
10+
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
11+
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
12+
13+
# Initialize client with inference key
14+
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
15+
16+
# Register signal handlers for cleanup
17+
datacrunch.containers.delete_deployment(DEPLOYMENT_NAME)
18+
print("Deployment deleted")

examples/containers/sglang_deployment_example.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
4545
HF_TOKEN = os.environ.get('HF_TOKEN')
4646

47-
4847
def wait_for_deployment_health(datacrunch_client: DataCrunchClient, deployment_name: str, max_attempts: int = 20, delay: int = 30) -> bool:
4948
"""Wait for deployment to reach healthy status.
5049
@@ -99,18 +98,18 @@ def graceful_shutdown(signum, frame) -> None:
9998

10099
try:
101100
# Get the inference API key
102-
inference_key = DATACRUNCH_INFERENCE_KEY
103-
if not inference_key:
104-
inference_key = input(
101+
datacrunch_inference_key = DATACRUNCH_INFERENCE_KEY
102+
if not datacrunch_inference_key:
103+
datacrunch_inference_key = input(
105104
"Enter your Inference API Key from the DataCrunch dashboard: ")
106105
else:
107106
print("Using Inference API Key from environment")
108107

109108
# Initialize client with inference key
110109
datacrunch = DataCrunchClient(
111-
DATACRUNCH_CLIENT_ID,
112-
DATACRUNCH_CLIENT_SECRET,
113-
inference_key=inference_key
110+
client_id=DATACRUNCH_CLIENT_ID,
111+
client_secret=DATACRUNCH_CLIENT_SECRET,
112+
inference_key=datacrunch_inference_key
114113
)
115114

116115
# Register signal handlers for cleanup

0 commit comments

Comments
 (0)