Skip to content

Commit 1e5a96f

Browse files
Onprem Compatibility Change
1 parent fdeee61 commit 1e5a96f

29 files changed

Lines changed: 916 additions & 190 deletions

charts/model-engine/templates/_helpers.tpl

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ env:
256256
- name: ABS_CONTAINER_NAME
257257
value: {{ .Values.azure.abs_container_name }}
258258
{{- end }}
259+
{{- if .Values.s3EndpointUrl }}
260+
- name: S3_ENDPOINT_URL
261+
value: {{ .Values.s3EndpointUrl | quote }}
262+
{{- end }}
259263
{{- end }}
260264

261265
{{- define "modelEngine.syncForwarderTemplateEnv" -}}
@@ -342,9 +346,27 @@ env:
342346
value: "/workspace/model-engine/model_engine_server/core/configs/config.yaml"
343347
{{- end }}
344348
- name: CELERY_ELASTICACHE_ENABLED
345-
value: "true"
349+
value: {{ .Values.celeryElasticacheEnabled | default true | quote }}
346350
- name: LAUNCH_SERVICE_TEMPLATE_FOLDER
347351
value: "/workspace/model-engine/model_engine_server/infra/gateways/resources/templates"
352+
{{- if .Values.s3EndpointUrl }}
353+
- name: S3_ENDPOINT_URL
354+
value: {{ .Values.s3EndpointUrl | quote }}
355+
{{- end }}
356+
{{- if .Values.redisHost }}
357+
- name: REDIS_HOST
358+
value: {{ .Values.redisHost | quote }}
359+
- name: REDIS_PORT
360+
value: {{ .Values.redisPort | default "6379" | quote }}
361+
{{- end }}
362+
{{- if .Values.celeryBrokerUrl }}
363+
- name: CELERY_BROKER_URL
364+
value: {{ .Values.celeryBrokerUrl | quote }}
365+
{{- end }}
366+
{{- if .Values.celeryResultBackend }}
367+
- name: CELERY_RESULT_BACKEND
368+
value: {{ .Values.celeryResultBackend | quote }}
369+
{{- end }}
348370
{{- if .Values.redis.auth}}
349371
- name: REDIS_AUTH_TOKEN
350372
value: {{ .Values.redis.auth }}

model-engine/model_engine_server/api/dependencies.py

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,6 @@
9494
from model_engine_server.infra.gateways.resources.live_endpoint_resource_gateway import (
9595
LiveEndpointResourceGateway,
9696
)
97-
from model_engine_server.infra.gateways.resources.onprem_queue_endpoint_resource_delegate import (
98-
OnPremQueueEndpointResourceDelegate,
99-
)
10097
from model_engine_server.infra.gateways.resources.queue_endpoint_resource_delegate import (
10198
QueueEndpointResourceDelegate,
10299
)
@@ -117,7 +114,6 @@
117114
FakeDockerRepository,
118115
LiveTokenizerRepository,
119116
LLMFineTuneRepository,
120-
OnPremDockerRepository,
121117
RedisModelEndpointCacheRepository,
122118
S3FileLLMFineTuneEventsRepository,
123119
S3FileLLMFineTuneRepository,
@@ -225,28 +221,25 @@ def _get_external_interfaces(
225221
)
226222

227223
queue_delegate: QueueEndpointResourceDelegate
228-
if CIRCLECI:
224+
if CIRCLECI or infra_config().cloud_provider == "onprem":
225+
# On-prem uses fake queue delegate (no SQS/ServiceBus)
229226
queue_delegate = FakeQueueEndpointResourceDelegate()
230227
elif infra_config().cloud_provider == "azure":
231228
queue_delegate = ASBQueueEndpointResourceDelegate()
232-
elif infra_config().cloud_provider == "onprem":
233-
queue_delegate = OnPremQueueEndpointResourceDelegate()
234229
else:
235230
queue_delegate = SQSQueueEndpointResourceDelegate(
236231
sqs_profile=os.getenv("SQS_PROFILE", hmi_config.sqs_profile)
237232
)
238233

239234
inference_task_queue_gateway: TaskQueueGateway
240235
infra_task_queue_gateway: TaskQueueGateway
241-
if CIRCLECI:
236+
if CIRCLECI or infra_config().cloud_provider == "onprem":
237+
# On-prem uses Redis-based task queues
242238
inference_task_queue_gateway = redis_24h_task_queue_gateway
243239
infra_task_queue_gateway = redis_task_queue_gateway
244240
elif infra_config().cloud_provider == "azure":
245241
inference_task_queue_gateway = servicebus_task_queue_gateway
246242
infra_task_queue_gateway = servicebus_task_queue_gateway
247-
elif infra_config().cloud_provider == "onprem":
248-
inference_task_queue_gateway = redis_task_queue_gateway
249-
infra_task_queue_gateway = redis_task_queue_gateway
250243
elif infra_config().celery_broker_type_redis:
251244
inference_task_queue_gateway = redis_task_queue_gateway
252245
infra_task_queue_gateway = redis_task_queue_gateway
@@ -288,10 +281,8 @@ def _get_external_interfaces(
288281
if infra_config().cloud_provider == "azure":
289282
filesystem_gateway = ABSFilesystemGateway()
290283
llm_artifact_gateway = ABSLLMArtifactGateway()
291-
elif infra_config().cloud_provider == "onprem":
292-
filesystem_gateway = S3FilesystemGateway() # Uses MinIO via s3_utils
293-
llm_artifact_gateway = S3LLMArtifactGateway() # Uses MinIO via s3_utils
294284
else:
285+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
295286
filesystem_gateway = S3FilesystemGateway()
296287
llm_artifact_gateway = S3LLMArtifactGateway()
297288
model_endpoints_schema_gateway = LiveModelEndpointsSchemaGateway(
@@ -341,10 +332,8 @@ def _get_external_interfaces(
341332
if infra_config().cloud_provider == "azure":
342333
llm_fine_tune_repository = ABSFileLLMFineTuneRepository(file_path=file_path)
343334
llm_fine_tune_events_repository = ABSFileLLMFineTuneEventsRepository()
344-
elif infra_config().cloud_provider == "onprem":
345-
llm_fine_tune_repository = S3FileLLMFineTuneRepository(file_path=file_path) # Uses MinIO
346-
llm_fine_tune_events_repository = S3FileLLMFineTuneEventsRepository() # Uses MinIO
347335
else:
336+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
348337
llm_fine_tune_repository = S3FileLLMFineTuneRepository(file_path=file_path)
349338
llm_fine_tune_events_repository = S3FileLLMFineTuneEventsRepository()
350339
llm_fine_tuning_service = DockerImageBatchJobLLMFineTuningService(
@@ -360,18 +349,16 @@ def _get_external_interfaces(
360349
file_storage_gateway: FileStorageGateway
361350
if infra_config().cloud_provider == "azure":
362351
file_storage_gateway = ABSFileStorageGateway()
363-
elif infra_config().cloud_provider == "onprem":
364-
file_storage_gateway = S3FileStorageGateway() # Uses MinIO via s3_utils
365352
else:
353+
# AWS uses S3, on-prem uses MinIO (S3-compatible)
366354
file_storage_gateway = S3FileStorageGateway()
367355

368356
docker_repository: DockerRepository
369-
if CIRCLECI:
357+
if CIRCLECI or infra_config().cloud_provider == "onprem":
358+
# On-prem uses fake docker repository (no ECR/ACR validation)
370359
docker_repository = FakeDockerRepository()
371360
elif infra_config().cloud_provider == "azure":
372361
docker_repository = ACRDockerRepository()
373-
elif infra_config().cloud_provider == "onprem":
374-
docker_repository = OnPremDockerRepository()
375362
else:
376363
docker_repository = ECRDockerRepository()
377364

model-engine/model_engine_server/common/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,13 @@ class HostedModelInferenceServiceConfig:
7070
user_inference_tensorflow_repository: str
7171
docker_image_layer_cache_repository: str
7272
sensitive_log_mode: bool
73-
# Exactly one of the following three must be specified
73+
# Exactly one of the following must be specified for Redis cache
7474
cache_redis_aws_url: Optional[str] = None # also using this to store sync autoscaling metrics
7575
cache_redis_azure_host: Optional[str] = None
7676
cache_redis_aws_secret_name: Optional[str] = (
7777
None # Not an env var because the redis cache info is already here
7878
)
79+
cache_redis_onprem_url: Optional[str] = None # For on-prem Redis (e.g., redis://redis:6379/0)
7980
sglang_repository: Optional[str] = None
8081

8182
@classmethod
@@ -90,8 +91,13 @@ def from_yaml(cls, yaml_path):
9091

9192
@property
9293
def cache_redis_url(self) -> str:
94+
# On-prem Redis support (explicit URL, no cloud provider dependency)
95+
if self.cache_redis_onprem_url:
96+
return self.cache_redis_onprem_url
97+
9398
cloud_provider = infra_config().cloud_provider
9499

100+
# On-prem: support REDIS_HOST env var fallback
95101
if cloud_provider == "onprem":
96102
if self.cache_redis_aws_url:
97103
logger.info("On-prem deployment using cache_redis_aws_url")

model-engine/model_engine_server/common/io.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
import os
44
from typing import Any
55

6+
import boto3
67
import smart_open
78
from model_engine_server.core.config import infra_config
89

910

1011
def open_wrapper(uri: str, mode: str = "rt", **kwargs):
1112
client: Any
13+
cloud_provider: str
14+
# This follows the 5.1.0 smart_open API
1215
try:
1316
cloud_provider = infra_config().cloud_provider
1417
except Exception:
1518
cloud_provider = "aws"
16-
1719
if cloud_provider == "azure":
1820
from azure.identity import DefaultAzureCredential
1921
from azure.storage.blob import BlobServiceClient
@@ -23,9 +25,9 @@ def open_wrapper(uri: str, mode: str = "rt", **kwargs):
2325
DefaultAzureCredential(),
2426
)
2527
else:
26-
from model_engine_server.infra.gateways.s3_utils import get_s3_client
27-
28-
client = get_s3_client(kwargs)
28+
profile_name = kwargs.get("aws_profile", os.getenv("AWS_PROFILE"))
29+
session = boto3.Session(profile_name=profile_name)
30+
client = session.client("s3")
2931

3032
transport_params = {"client": client}
3133
return smart_open.open(uri, mode, transport_params=transport_params)

model-engine/model_engine_server/core/aws/roles.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,21 @@ def session(role: Optional[str], session_type: SessionT = Session) -> SessionT:
119119
120120
:param:`session_type` defines the type of session to return. Most users will use
121121
the default boto3 type. Some users required a special type (e.g aioboto3 session).
122+
123+
For on-prem deployments without AWS profiles, pass role=None or role=""
124+
to use default credentials from environment variables (AWS_ACCESS_KEY_ID, etc).
122125
"""
123126
# Do not assume roles in CIRCLECI
124127
if os.getenv("CIRCLECI"):
125128
logger.warning(f"In circleci, not assuming role (ignoring: {role})")
126129
role = None
127-
sesh: SessionT = session_type(profile_name=role)
130+
131+
# Use profile-based auth only if role is specified
132+
# For on-prem with MinIO, role will be None or empty - use env var credentials
133+
if role:
134+
sesh: SessionT = session_type(profile_name=role)
135+
else:
136+
sesh: SessionT = session_type() # Uses default credential chain (env vars)
128137
return sesh
129138

130139

model-engine/model_engine_server/core/aws/storage_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import time
23
from typing import IO, Callable, Iterable, Optional, Sequence
34

@@ -20,6 +21,10 @@
2021

2122

2223
def sync_storage_client(**kwargs) -> BaseClient:
24+
# Support for MinIO/on-prem S3-compatible storage
25+
endpoint_url = os.getenv("S3_ENDPOINT_URL")
26+
if endpoint_url and "endpoint_url" not in kwargs:
27+
kwargs["endpoint_url"] = endpoint_url
2328
return session(infra_config().profile_ml_worker).client("s3", **kwargs) # type: ignore
2429

2530

0 commit comments

Comments
 (0)