Merge branch 'master' into refactor/use-dataclasses

tamirse · tamirse · commit 57ef83454b3e · 2025-04-28T12:56:47.000+03:00
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,8 @@
 Changelog
 =========
 
+* Added example for calling the inference endpoint with a minimal client
+* Added missing doc generation for inference examples
 * Refactor: use dataclasses and google docstring style in instances.py
 
 v1.10.0 (2025-04-17)
diff --git a/datacrunch/InferenceClient/inference_client.py b/datacrunch/InferenceClient/inference_client.py
@@ -6,16 +6,19 @@
 from urllib.parse import urlparse
 from enum import Enum
 
+
 class InferenceClientError(Exception):
     """Base exception for InferenceClient errors."""
     pass
 
+
 class AsyncStatus(int, Enum):
     Initialized = 0
     Queue = 1
     Inference = 2
     Completed = 3
 
+
 @dataclass_json(undefined=Undefined.EXCLUDE)
 @dataclass
 class InferenceResponse:
@@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
             raise InferenceClientError(f"Request to {path} failed: {str(e)}")
 
     def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
+        """Make a synchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            stream: Whether to stream the response. Defaults to False.
+
+        Returns:
+            InferenceResponse: Object containing the response data.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         response = self._make_request(
             http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
 
@@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
         )
 
     def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
+        """Make an asynchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            no_response: If True, don't wait for response. Defaults to False.
+
+        Returns:
+            AsyncInferenceExecution: Object to track the async execution status.
+            If no_response is True, returns None.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         # Add relevant headers to the request, to indicate that the request is async
         headers = headers or {}
         if no_response:
diff --git a/docs/source/examples/containers/index.rst b/docs/source/examples/containers/index.rst
@@ -7,10 +7,13 @@ This section contains examples demonstrating how to work with containers in Data
    :maxdepth: 1
    :caption: Contents:
 
-   compute_resources
    deployments
+   compute_resources
    environment_variables
    registry_credentials
    secrets
    sglang
-   scaling 
+   scaling 
+   inference_async
+   inference_sync
+   inference_minimal
diff --git a/docs/source/examples/containers/inference_async.rst b/docs/source/examples/containers/inference_async.rst
@@ -0,0 +1,8 @@
+Calling the inference endpoint in async mode
+============================================
+
+This example demonstrates how to call the inference endpoint in async mode.
+
+.. literalinclude:: ../../../../examples/containers/calling_the_inference_endpoint_in_async_mode.py
+   :language: python
+   :caption: Calling the inference endpoint in async mode
diff --git a/docs/source/examples/containers/inference_minimal.rst b/docs/source/examples/containers/inference_minimal.rst
@@ -0,0 +1,8 @@
+Calling the inference endpoint using a minimal client
+=====================================================
+
+This example demonstrates how to call the inference endpoint using a minimal client that only uses only an inference key (no client credentials needed).
+
+.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key.py
+   :language: python
+   :caption: Calling the inference endpoint using a minimal client
diff --git a/docs/source/examples/containers/inference_minimal_async.rst b/docs/source/examples/containers/inference_minimal_async.rst
@@ -0,0 +1,8 @@
+Calling the inference async endpoint using a minimal client
+===========================================================
+
+This example demonstrates how to call the inference async endpoint using a minimal client that only uses only an inference key (no client credentials needed).
+
+.. literalinclude:: ../../../../examples/containers/calling_the_endpoint_with_inference_key_async.py
+   :language: python
+   :caption: Calling the inference async endpoint using a minimal client
diff --git a/docs/source/examples/containers/sglang.rst b/docs/source/examples/containers/sglang.rst
@@ -5,4 +5,4 @@ This example demonstrates how to deploy and manage SGLang applications in DataCr
 
 .. literalinclude:: ../../../../examples/containers/sglang_deployment_example.py
    :language: python
-   :caption: SGLang Deployment 
+   :caption: SGLang Deployment Example
diff --git a/examples/containers/calling_the_endpoint_asynchronously.py b/examples/containers/calling_the_endpoint_asynchronously.py
@@ -4,15 +4,16 @@
 from datacrunch.InferenceClient.inference_client import AsyncStatus
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_synchronously.py b/examples/containers/calling_the_endpoint_synchronously.py
@@ -2,15 +2,16 @@
 from datacrunch import DataCrunchClient
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_with_inference_key.py b/examples/containers/calling_the_endpoint_with_inference_key.py
@@ -0,0 +1,27 @@
+import os
+from datacrunch.InferenceClient import InferenceClient
+
+# Get inference key and endpoint base url from environment variables
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
+
+# Create an inference client that uses only the inference key, without client credentials
+inference_client = InferenceClient(
+    inference_key=DATACRUNCH_INFERENCE_KEY,
+    endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
+)
+
+# Make a synchronous request to the endpoint.
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+response = inference_client.run_sync(data=data, path='v1/completions')
+
+# Print the response
+print(response.output())
diff --git a/examples/containers/calling_the_endpoint_with_inference_key_async.py b/examples/containers/calling_the_endpoint_with_inference_key_async.py
@@ -0,0 +1,36 @@
+import os
+from time import sleep
+from datacrunch.InferenceClient import InferenceClient
+from datacrunch.InferenceClient.inference_client import AsyncStatus
+
+# Get inference key and endpoint base url from environment variables
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
+
+# Create an inference client that uses only the inference key, without client credentials
+inference_client = InferenceClient(
+    inference_key=DATACRUNCH_INFERENCE_KEY,
+    endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
+)
+
+# Make an asynchronous request to the endpoint
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+# Run the request asynchronously using the inference client
+async_inference_execution = inference_client.run(
+    data=data, path='v1/completions')
+
+# Poll for status until completion
+while async_inference_execution.status() != AsyncStatus.Completed:
+    print(async_inference_execution.status_json())
+    sleep(1)
+
+# Print the response
+print(async_inference_execution.output())