Skip to content

Commit e7597be

Browse files
Onprem Compatibility Change
1 parent fdeee61 commit e7597be

29 files changed

Lines changed: 924 additions & 179 deletions

charts/model-engine/templates/_helpers.tpl

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ env:
256256
- name: ABS_CONTAINER_NAME
257257
value: {{ .Values.azure.abs_container_name }}
258258
{{- end }}
259+
{{- if .Values.s3EndpointUrl }}
260+
- name: S3_ENDPOINT_URL
261+
value: {{ .Values.s3EndpointUrl | quote }}
262+
{{- end }}
259263
{{- end }}
260264

261265
{{- define "modelEngine.syncForwarderTemplateEnv" -}}
@@ -342,9 +346,27 @@ env:
342346
value: "/workspace/model-engine/model_engine_server/core/configs/config.yaml"
343347
{{- end }}
344348
- name: CELERY_ELASTICACHE_ENABLED
345-
value: "true"
349+
value: {{ .Values.celeryElasticacheEnabled | default true | quote }}
346350
- name: LAUNCH_SERVICE_TEMPLATE_FOLDER
347351
value: "/workspace/model-engine/model_engine_server/infra/gateways/resources/templates"
352+
{{- if .Values.s3EndpointUrl }}
353+
- name: S3_ENDPOINT_URL
354+
value: {{ .Values.s3EndpointUrl | quote }}
355+
{{- end }}
356+
{{- if .Values.redisHost }}
357+
- name: REDIS_HOST
358+
value: {{ .Values.redisHost | quote }}
359+
- name: REDIS_PORT
360+
value: {{ .Values.redisPort | default "6379" | quote }}
361+
{{- end }}
362+
{{- if .Values.celeryBrokerUrl }}
363+
- name: CELERY_BROKER_URL
364+
value: {{ .Values.celeryBrokerUrl | quote }}
365+
{{- end }}
366+
{{- if .Values.celeryResultBackend }}
367+
- name: CELERY_RESULT_BACKEND
368+
value: {{ .Values.celeryResultBackend | quote }}
369+
{{- end }}
348370
{{- if .Values.redis.auth}}
349371
- name: REDIS_AUTH_TOKEN
350372
value: {{ .Values.redis.auth }}

model-engine/model_engine_server/api/dependencies.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -227,26 +227,24 @@ def _get_external_interfaces(
227227
queue_delegate: QueueEndpointResourceDelegate
228228
if CIRCLECI:
229229
queue_delegate = FakeQueueEndpointResourceDelegate()
230-
elif infra_config().cloud_provider == "azure":
231-
queue_delegate = ASBQueueEndpointResourceDelegate()
232230
elif infra_config().cloud_provider == "onprem":
233231
queue_delegate = OnPremQueueEndpointResourceDelegate()
232+
elif infra_config().cloud_provider == "azure":
233+
queue_delegate = ASBQueueEndpointResourceDelegate()
234234
else:
235235
queue_delegate = SQSQueueEndpointResourceDelegate(
236236
sqs_profile=os.getenv("SQS_PROFILE", hmi_config.sqs_profile)
237237
)
238238

239239
inference_task_queue_gateway: TaskQueueGateway
240240
infra_task_queue_gateway: TaskQueueGateway
241-
if CIRCLECI:
241+
if CIRCLECI or infra_config().cloud_provider == "onprem":
242+
# On-prem uses Redis-based task queues
242243
inference_task_queue_gateway = redis_24h_task_queue_gateway
243244
infra_task_queue_gateway = redis_task_queue_gateway
244245
elif infra_config().cloud_provider == "azure":
245246
inference_task_queue_gateway = servicebus_task_queue_gateway
246247
infra_task_queue_gateway = servicebus_task_queue_gateway
247-
elif infra_config().cloud_provider == "onprem":
248-
inference_task_queue_gateway = redis_task_queue_gateway
249-
infra_task_queue_gateway = redis_task_queue_gateway
250248
elif infra_config().celery_broker_type_redis:
251249
inference_task_queue_gateway = redis_task_queue_gateway
252250
infra_task_queue_gateway = redis_task_queue_gateway
@@ -288,10 +286,8 @@ def _get_external_interfaces(
288286
if infra_config().cloud_provider == "azure":
289287
filesystem_gateway = ABSFilesystemGateway()
290288
llm_artifact_gateway = ABSLLMArtifactGateway()
291-
elif infra_config().cloud_provider == "onprem":
292-
filesystem_gateway = S3FilesystemGateway() # Uses MinIO via s3_utils
293-
llm_artifact_gateway = S3LLMArtifactGateway() # Uses MinIO via s3_utils
294289
else:
290+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
295291
filesystem_gateway = S3FilesystemGateway()
296292
llm_artifact_gateway = S3LLMArtifactGateway()
297293
model_endpoints_schema_gateway = LiveModelEndpointsSchemaGateway(
@@ -341,10 +337,8 @@ def _get_external_interfaces(
341337
if infra_config().cloud_provider == "azure":
342338
llm_fine_tune_repository = ABSFileLLMFineTuneRepository(file_path=file_path)
343339
llm_fine_tune_events_repository = ABSFileLLMFineTuneEventsRepository()
344-
elif infra_config().cloud_provider == "onprem":
345-
llm_fine_tune_repository = S3FileLLMFineTuneRepository(file_path=file_path) # Uses MinIO
346-
llm_fine_tune_events_repository = S3FileLLMFineTuneEventsRepository() # Uses MinIO
347340
else:
341+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
348342
llm_fine_tune_repository = S3FileLLMFineTuneRepository(file_path=file_path)
349343
llm_fine_tune_events_repository = S3FileLLMFineTuneEventsRepository()
350344
llm_fine_tuning_service = DockerImageBatchJobLLMFineTuningService(
@@ -360,18 +354,17 @@ def _get_external_interfaces(
360354
file_storage_gateway: FileStorageGateway
361355
if infra_config().cloud_provider == "azure":
362356
file_storage_gateway = ABSFileStorageGateway()
363-
elif infra_config().cloud_provider == "onprem":
364-
file_storage_gateway = S3FileStorageGateway() # Uses MinIO via s3_utils
365357
else:
358+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
366359
file_storage_gateway = S3FileStorageGateway()
367360

368361
docker_repository: DockerRepository
369362
if CIRCLECI:
370363
docker_repository = FakeDockerRepository()
371-
elif infra_config().cloud_provider == "azure":
372-
docker_repository = ACRDockerRepository()
373364
elif infra_config().cloud_provider == "onprem":
374365
docker_repository = OnPremDockerRepository()
366+
elif infra_config().cloud_provider == "azure":
367+
docker_repository = ACRDockerRepository()
375368
else:
376369
docker_repository = ECRDockerRepository()
377370

model-engine/model_engine_server/common/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,13 @@ class HostedModelInferenceServiceConfig:
7070
user_inference_tensorflow_repository: str
7171
docker_image_layer_cache_repository: str
7272
sensitive_log_mode: bool
73-
# Exactly one of the following three must be specified
73+
# Exactly one of the following must be specified for Redis cache
7474
cache_redis_aws_url: Optional[str] = None # also using this to store sync autoscaling metrics
7575
cache_redis_azure_host: Optional[str] = None
7676
cache_redis_aws_secret_name: Optional[str] = (
7777
None # Not an env var because the redis cache info is already here
7878
)
79+
cache_redis_onprem_url: Optional[str] = None # For on-prem Redis (e.g., redis://redis:6379/0)
7980
sglang_repository: Optional[str] = None
8081

8182
@classmethod
@@ -90,8 +91,13 @@ def from_yaml(cls, yaml_path):
9091

9192
@property
9293
def cache_redis_url(self) -> str:
94+
# On-prem Redis support (explicit URL, no cloud provider dependency)
95+
if self.cache_redis_onprem_url:
96+
return self.cache_redis_onprem_url
97+
9398
cloud_provider = infra_config().cloud_provider
9499

100+
# On-prem: support REDIS_HOST env var fallback
95101
if cloud_provider == "onprem":
96102
if self.cache_redis_aws_url:
97103
logger.info("On-prem deployment using cache_redis_aws_url")

model-engine/model_engine_server/common/io.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
import os
44
from typing import Any
55

6+
import boto3
67
import smart_open
78
from model_engine_server.core.config import infra_config
89

910

1011
def open_wrapper(uri: str, mode: str = "rt", **kwargs):
1112
client: Any
13+
cloud_provider: str
14+
# This follows the 5.1.0 smart_open API
1215
try:
1316
cloud_provider = infra_config().cloud_provider
1417
except Exception:
1518
cloud_provider = "aws"
16-
1719
if cloud_provider == "azure":
1820
from azure.identity import DefaultAzureCredential
1921
from azure.storage.blob import BlobServiceClient
@@ -23,9 +25,9 @@ def open_wrapper(uri: str, mode: str = "rt", **kwargs):
2325
DefaultAzureCredential(),
2426
)
2527
else:
26-
from model_engine_server.infra.gateways.s3_utils import get_s3_client
27-
28-
client = get_s3_client(kwargs)
28+
profile_name = kwargs.get("aws_profile", os.getenv("AWS_PROFILE"))
29+
session = boto3.Session(profile_name=profile_name)
30+
client = session.client("s3")
2931

3032
transport_params = {"client": client}
3133
return smart_open.open(uri, mode, transport_params=transport_params)

model-engine/model_engine_server/core/aws/roles.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,21 @@ def session(role: Optional[str], session_type: SessionT = Session) -> SessionT:
119119
120120
:param:`session_type` defines the type of session to return. Most users will use
121121
the default boto3 type. Some users required a special type (e.g aioboto3 session).
122+
123+
For on-prem deployments without AWS profiles, pass role=None or role=""
124+
to use default credentials from environment variables (AWS_ACCESS_KEY_ID, etc).
122125
"""
123126
# Do not assume roles in CIRCLECI
124127
if os.getenv("CIRCLECI"):
125128
logger.warning(f"In circleci, not assuming role (ignoring: {role})")
126129
role = None
127-
sesh: SessionT = session_type(profile_name=role)
130+
131+
# Use profile-based auth only if role is specified
132+
# For on-prem with MinIO, role will be None or empty - use env var credentials
133+
if role:
134+
sesh: SessionT = session_type(profile_name=role)
135+
else:
136+
sesh: SessionT = session_type() # Uses default credential chain (env vars)
128137
return sesh
129138

130139

model-engine/model_engine_server/core/aws/storage_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import time
23
from typing import IO, Callable, Iterable, Optional, Sequence
34

@@ -20,6 +21,10 @@
2021

2122

2223
def sync_storage_client(**kwargs) -> BaseClient:
24+
# Support for MinIO/on-prem S3-compatible storage
25+
endpoint_url = os.getenv("S3_ENDPOINT_URL")
26+
if endpoint_url and "endpoint_url" not in kwargs:
27+
kwargs["endpoint_url"] = endpoint_url
2328
return session(infra_config().profile_ml_worker).client("s3", **kwargs) # type: ignore
2429

2530

0 commit comments

Comments
 (0)