Skip to content

Commit 57ef834

Browse files
committed
Merge branch 'master' into refactor/use-dataclasses
2 parents fea0768 + 869e37e commit 57ef834

File tree

11 files changed

+137
-7
lines changed

11 files changed

+137
-7
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Changelog
22
=========
33

4+
* Added example for calling the inference endpoint with a minimal client
5+
* Added missing doc generation for inference examples
46
* Refactor: use dataclasses and google docstring style in instances.py
57

68
v1.10.0 (2025-04-17)

datacrunch/InferenceClient/inference_client.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
from urllib.parse import urlparse
77
from enum import Enum
88

9+
910
class InferenceClientError(Exception):
1011
"""Base exception for InferenceClient errors."""
1112
pass
1213

14+
1315
class AsyncStatus(int, Enum):
1416
Initialized = 0
1517
Queue = 1
1618
Inference = 2
1719
Completed = 3
1820

21+
1922
@dataclass_json(undefined=Undefined.EXCLUDE)
2023
@dataclass
2124
class InferenceResponse:
@@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
222225
raise InferenceClientError(f"Request to {path} failed: {str(e)}")
223226

224227
def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
228+
"""Make a synchronous request to the inference endpoint.
229+
230+
Args:
231+
data: The data payload to send with the request
232+
path: API endpoint path. Defaults to empty string.
233+
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
234+
headers: Optional headers to include in the request
235+
http_method: HTTP method to use. Defaults to "POST".
236+
stream: Whether to stream the response. Defaults to False.
237+
238+
Returns:
239+
InferenceResponse: Object containing the response data.
240+
241+
Raises:
242+
InferenceClientError: If the request fails
243+
"""
225244
response = self._make_request(
226245
http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
227246

@@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
233252
)
234253

235254
def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
255+
"""Make an asynchronous request to the inference endpoint.
256+
257+
Args:
258+
data: The data payload to send with the request
259+
path: API endpoint path. Defaults to empty string.
260+
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
261+
headers: Optional headers to include in the request
262+
http_method: HTTP method to use. Defaults to "POST".
263+
no_response: If True, don't wait for response. Defaults to False.
264+
265+
Returns:
266+
AsyncInferenceExecution: Object to track the async execution status.
267+
If no_response is True, returns None.
268+
269+
Raises:
270+
InferenceClientError: If the request fails
271+
"""
236272
# Add relevant headers to the request, to indicate that the request is async
237273
headers = headers or {}
238274
if no_response:

docs/source/examples/containers/index.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ This section contains examples demonstrating how to work with containers in Data
77
:maxdepth: 1
88
:caption: Contents:
99

10-
compute_resources
1110
deployments
11+
compute_resources
1212
environment_variables
1313
registry_credentials
1414
secrets
1515
sglang
16-
scaling
16+
scaling
17+
inference_async
18+
inference_sync
19+
inference_minimal
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Calling the inference endpoint in async mode
2+
============================================
3+
4+
This example demonstrates how to call the inference endpoint in async mode.
5+
6+
.. literalinclude:: ../../../../examples/containers/calling_the_inference_endpoint_in_async_mode.py
7+
:language: python
8+
:caption: Calling the inference endpoint in async mode
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Calling the inference endpoint using a minimal client
2+
=====================================================
3+
4+
This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials needed).
5+
6+
.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py
7+
:language: python
8+
:caption: Calling the inference endpoint using a minimal client
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Calling the inference async endpoint using a minimal client
2+
===========================================================
3+
4+
This example demonstrates how to call the inference async endpoint using a minimal client that only uses only an inference key (no client credentials needed).
5+
6+
.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key_async.py
7+
:language: python
8+
:caption: Calling the inference async endpoint using a minimal client

docs/source/examples/containers/sglang.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ This example demonstrates how to deploy and manage SGLang applications in DataCr
55

66
.. literalinclude:: ../../../../examples/containers/sglang_deployment_example.py
77
:language: python
8-
:caption: SGLang Deployment
8+
:caption: SGLang Deployment Example

examples/containers/calling_the_endpoint_asynchronously.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
from datacrunch.InferenceClient.inference_client import AsyncStatus
55

66
# Configuration - replace with your deployment name
7-
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
7+
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
88

99
# Get client secret and id from environment variables
1010
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
1111
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
1212
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
1313

1414
# DataCrunch client instance
15-
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
15+
datacrunch = DataCrunchClient(
16+
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
1617

1718
# Get the deployment
1819
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)

examples/containers/calling_the_endpoint_synchronously.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
from datacrunch import DataCrunchClient
33

44
# Configuration - replace with your deployment name
5-
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
5+
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
66

77
# Get client secret and id from environment variables
88
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
99
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
1010
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
1111

1212
# DataCrunch client instance
13-
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
13+
datacrunch = DataCrunchClient(
14+
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
1415

1516
# Get the deployment
1617
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import os
2+
from datacrunch.InferenceClient import InferenceClient
3+
4+
# Get inference key and endpoint base url from environment variables
5+
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
6+
DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
7+
8+
# Create an inference client that uses only the inference key, without client credentials
9+
inference_client = InferenceClient(
10+
inference_key=DATACRUNCH_INFERENCE_KEY,
11+
endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
12+
)
13+
14+
# Make a synchronous request to the endpoint.
15+
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
16+
data = {
17+
"model": "deepseek-ai/deepseek-llm-7b-chat",
18+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
19+
"max_tokens": 128,
20+
"temperature": 0.7,
21+
"top_p": 0.9
22+
}
23+
24+
response = inference_client.run_sync(data=data, path='v1/completions')
25+
26+
# Print the response
27+
print(response.output())

0 commit comments

Comments
 (0)