Skip to content

Commit 01c8d54

Browse files
committed
simplify
1 parent 8470219 commit 01c8d54

File tree

1 file changed

+153
-162
lines changed

1 file changed

+153
-162
lines changed

examples/containers/sglang_deployment_example.py

Lines changed: 153 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@
4343
INFERENCE_KEY = os.environ.get('INFERENCE_KEY')
4444
HF_TOKEN = os.environ.get('HF_TOKEN')
4545

46-
# DataCrunch client instance (global for graceful shutdown)
47-
datacrunch = None
48-
4946

5047
def wait_for_deployment_health(datacrunch_client: DataCrunchClient, deployment_name: str, max_attempts: int = 20, delay: int = 30) -> bool:
5148
"""Wait for deployment to reach healthy status.
@@ -99,174 +96,168 @@ def graceful_shutdown(signum, frame) -> None:
9996
sys.exit(0)
10097

10198

102-
def main() -> None:
103-
"""Main function demonstrating SGLang deployment."""
99+
try:
100+
# Get the inference API key
101+
inference_key = INFERENCE_KEY
102+
if not inference_key:
103+
inference_key = input(
104+
"Enter your Inference API Key from the DataCrunch dashboard: ")
105+
else:
106+
print("Using Inference API Key from environment")
107+
108+
# Initialize client with inference key
109+
datacrunch = DataCrunchClient(
110+
DATACRUNCH_CLIENT_ID,
111+
DATACRUNCH_CLIENT_SECRET,
112+
inference_key=inference_key
113+
)
114+
115+
# Register signal handlers for cleanup
116+
signal.signal(signal.SIGINT, graceful_shutdown)
117+
signal.signal(signal.SIGTERM, graceful_shutdown)
118+
119+
# Create a secret for the Hugging Face token
120+
print(f"Creating secret for Hugging Face token: {HF_SECRET_NAME}")
104121
try:
105-
# Get the inference API key
106-
inference_key = INFERENCE_KEY
107-
if not inference_key:
108-
inference_key = input(
109-
"Enter your Inference API Key from the DataCrunch dashboard: ")
122+
# Check if secret already exists
123+
existing_secrets = datacrunch.containers.get_secrets()
124+
secret_exists = any(
125+
secret.name == HF_SECRET_NAME for secret in existing_secrets)
126+
127+
if not secret_exists:
128+
# check is HF_TOKEN is set, if not, prompt the user
129+
if not HF_TOKEN:
130+
HF_TOKEN = input(
131+
"Enter your Hugging Face token: ")
132+
datacrunch.containers.create_secret(
133+
HF_SECRET_NAME, HF_TOKEN)
134+
print(f"Secret '{HF_SECRET_NAME}' created successfully")
110135
else:
111-
print("Using Inference API Key from environment")
112-
113-
# Initialize client with inference key
114-
global datacrunch
115-
datacrunch = DataCrunchClient(
116-
DATACRUNCH_CLIENT_ID,
117-
DATACRUNCH_CLIENT_SECRET,
118-
inference_key=inference_key
119-
)
120-
121-
# Register signal handlers for cleanup
122-
signal.signal(signal.SIGINT, graceful_shutdown)
123-
signal.signal(signal.SIGTERM, graceful_shutdown)
124-
125-
# Create a secret for the Hugging Face token
126-
print(f"Creating secret for Hugging Face token: {HF_SECRET_NAME}")
127-
try:
128-
# Check if secret already exists
129-
existing_secrets = datacrunch.containers.get_secrets()
130-
secret_exists = any(
131-
secret.name == HF_SECRET_NAME for secret in existing_secrets)
132-
133-
if not secret_exists:
134-
# check is HF_TOKEN is set, if not, prompt the user
135-
if not HF_TOKEN:
136-
HF_TOKEN = input(
137-
"Enter your Hugging Face token: ")
138-
datacrunch.containers.create_secret(
139-
HF_SECRET_NAME, HF_TOKEN)
140-
print(f"Secret '{HF_SECRET_NAME}' created successfully")
141-
else:
142-
print(
143-
f"Secret '{HF_SECRET_NAME}' already exists, using existing secret")
144-
except APIException as e:
145-
print(f"Error creating secret: {e}")
146-
return
147-
148-
# Create container configuration
149-
container = Container(
150-
image=IMAGE_URL,
151-
exposed_port=30000,
152-
healthcheck=HealthcheckSettings(
136+
print(
137+
f"Secret '{HF_SECRET_NAME}' already exists, using existing secret")
138+
except APIException as e:
139+
print(f"Error creating secret: {e}")
140+
sys.exit(1)
141+
142+
# Create container configuration
143+
container = Container(
144+
image=IMAGE_URL,
145+
exposed_port=30000,
146+
healthcheck=HealthcheckSettings(
147+
enabled=True,
148+
port=30000,
149+
path="/health"
150+
),
151+
entrypoint_overrides=EntrypointOverridesSettings(
152+
enabled=True,
153+
cmd=["python3", "-m", "sglang.launch_server", "--model-path",
154+
MODEL_PATH, "--host", "0.0.0.0", "--port", "30000"]
155+
),
156+
env=[
157+
EnvVar(
158+
name="HF_TOKEN",
159+
value_or_reference_to_secret=HF_SECRET_NAME,
160+
type=EnvVarType.SECRET
161+
)
162+
]
163+
)
164+
165+
# Create scaling configuration - default values
166+
scaling_options = ScalingOptions(
167+
min_replica_count=1,
168+
max_replica_count=2,
169+
scale_down_policy=ScalingPolicy(delay_seconds=300),
170+
scale_up_policy=ScalingPolicy(delay_seconds=300),
171+
queue_message_ttl_seconds=500,
172+
concurrent_requests_per_replica=1,
173+
scaling_triggers=ScalingTriggers(
174+
queue_load=QueueLoadScalingTrigger(threshold=1),
175+
cpu_utilization=UtilizationScalingTrigger(
153176
enabled=True,
154-
port=30000,
155-
path="/health"
177+
threshold=90
156178
),
157-
entrypoint_overrides=EntrypointOverridesSettings(
179+
gpu_utilization=UtilizationScalingTrigger(
158180
enabled=True,
159-
cmd=["python3", "-m", "sglang.launch_server", "--model-path",
160-
MODEL_PATH, "--host", "0.0.0.0", "--port", "30000"]
161-
),
162-
env=[
163-
EnvVar(
164-
name="HF_TOKEN",
165-
value_or_reference_to_secret=HF_SECRET_NAME,
166-
type=EnvVarType.SECRET
167-
)
168-
]
169-
)
170-
171-
# Create scaling configuration - default values
172-
scaling_options = ScalingOptions(
173-
min_replica_count=1,
174-
max_replica_count=2,
175-
scale_down_policy=ScalingPolicy(delay_seconds=300),
176-
scale_up_policy=ScalingPolicy(delay_seconds=300),
177-
queue_message_ttl_seconds=500,
178-
concurrent_requests_per_replica=1,
179-
scaling_triggers=ScalingTriggers(
180-
queue_load=QueueLoadScalingTrigger(threshold=1),
181-
cpu_utilization=UtilizationScalingTrigger(
182-
enabled=True,
183-
threshold=90
184-
),
185-
gpu_utilization=UtilizationScalingTrigger(
186-
enabled=True,
187-
threshold=90
188-
)
181+
threshold=90
189182
)
190183
)
184+
)
185+
186+
# Create registry and compute settings
187+
registry_settings = ContainerRegistrySettings(is_private=False)
188+
# For a 7B model, General Compute (24GB VRAM) is sufficient
189+
compute = ComputeResource(name="General Compute", size=1)
190+
191+
# Create deployment object
192+
deployment = Deployment(
193+
name=DEPLOYMENT_NAME,
194+
container_registry_settings=registry_settings,
195+
containers=[container],
196+
compute=compute,
197+
scaling=scaling_options,
198+
is_spot=False
199+
)
200+
201+
# Create the deployment
202+
created_deployment = datacrunch.containers.create_deployment(
203+
deployment)
204+
print(f"Created deployment: {created_deployment.name}")
205+
print("This will take several minutes while the model is downloaded and the server starts...")
206+
207+
# Wait for deployment to be healthy
208+
if not wait_for_deployment_health(datacrunch, DEPLOYMENT_NAME):
209+
print("Deployment health check failed")
210+
cleanup_resources(datacrunch)
211+
sys.exit(1)
191212

192-
# Create registry and compute settings
193-
registry_settings = ContainerRegistrySettings(is_private=False)
194-
# For a 7B model, General Compute (24GB VRAM) is sufficient
195-
compute = ComputeResource(name="General Compute", size=1)
196-
197-
# Create deployment object
198-
deployment = Deployment(
199-
name=DEPLOYMENT_NAME,
200-
container_registry_settings=registry_settings,
201-
containers=[container],
202-
compute=compute,
203-
scaling=scaling_options,
204-
is_spot=False
213+
# Test the deployment with a simple request
214+
print("\nTesting the deployment...")
215+
try:
216+
# Test model info endpoint
217+
print(
218+
"Testing /get_model_info endpoint by making a sync GET request to the SGLang server...")
219+
model_info_response = created_deployment._inference_client.get(
220+
path="/get_model_info")
221+
print("Model info endpoint is working!")
222+
print(f"Response: {model_info_response}")
223+
224+
# Test completions endpoint
225+
print("\nTesting completions API...")
226+
completions_data = {
227+
"model": MODEL_PATH,
228+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
229+
"max_tokens": 128,
230+
"temperature": 0.7,
231+
"top_p": 0.9,
232+
}
233+
234+
# Make a sync inference request to the SGLang server
235+
completions_response = created_deployment.run_sync(
236+
completions_data,
237+
path="/v1/completions",
205238
)
206-
207-
# Create the deployment
208-
created_deployment = datacrunch.containers.create_deployment(
209-
deployment)
210-
print(f"Created deployment: {created_deployment.name}")
211-
print("This will take several minutes while the model is downloaded and the server starts...")
212-
213-
# Wait for deployment to be healthy
214-
if not wait_for_deployment_health(datacrunch, DEPLOYMENT_NAME):
215-
print("Deployment health check failed")
216-
cleanup_resources(datacrunch)
217-
return
218-
219-
# Test the deployment with a simple request
220-
print("\nTesting the deployment...")
221-
try:
222-
# Test model info endpoint
223-
print(
224-
"Testing /get_model_info endpoint by making a sync GET request to the SGLang server...")
225-
model_info_response = created_deployment._inference_client.get(
226-
path="/get_model_info")
227-
print("Model info endpoint is working!")
228-
print(f"Response: {model_info_response}")
229-
230-
# Test completions endpoint
231-
print("\nTesting completions API...")
232-
completions_data = {
233-
"model": MODEL_PATH,
234-
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
235-
"max_tokens": 128,
236-
"temperature": 0.7,
237-
"top_p": 0.9,
238-
}
239-
240-
# Make a sync inference request to the SGLang server
241-
completions_response = created_deployment.run_sync(
242-
completions_data,
243-
path="/v1/completions",
244-
)
245-
print("Completions API is working!")
246-
print(f"Response: {completions_response}")
247-
248-
except Exception as e:
249-
print(f"Error testing deployment: {e}")
250-
251-
# Cleanup or keep running based on user input
252-
keep_running = input(
253-
"\nDo you want to keep the deployment running? (y/n): ")
254-
if keep_running.lower() != 'y':
255-
cleanup_resources(datacrunch)
256-
else:
257-
print(
258-
f"Deployment {DEPLOYMENT_NAME} is running. Don't forget to delete it when finished.")
259-
print("You can delete it from the DataCrunch dashboard or by running:")
260-
print(f"datacrunch.containers.delete('{DEPLOYMENT_NAME}')")
239+
print("Completions API is working!")
240+
print(f"Response: {completions_response}")
261241

262242
except Exception as e:
263-
print(f"Unexpected error: {e}")
264-
# Attempt cleanup even if there was an error
265-
try:
266-
cleanup_resources(datacrunch)
267-
except Exception as cleanup_error:
268-
print(f"Error during cleanup after failure: {cleanup_error}")
269-
243+
print(f"Error testing deployment: {e}")
270244

271-
if __name__ == "__main__":
272-
main()
245+
# Cleanup or keep running based on user input
246+
keep_running = input(
247+
"\nDo you want to keep the deployment running? (y/n): ")
248+
if keep_running.lower() != 'y':
249+
cleanup_resources(datacrunch)
250+
else:
251+
print(
252+
f"Deployment {DEPLOYMENT_NAME} is running. Don't forget to delete it when finished.")
253+
print("You can delete it from the DataCrunch dashboard or by running:")
254+
print(f"datacrunch.containers.delete('{DEPLOYMENT_NAME}')")
255+
256+
except Exception as e:
257+
print(f"Unexpected error: {e}")
258+
# Attempt cleanup even if there was an error
259+
try:
260+
cleanup_resources(datacrunch)
261+
except Exception as cleanup_error:
262+
print(f"Error during cleanup after failure: {cleanup_error}")
263+
sys.exit(1)

0 commit comments

Comments
 (0)