Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/model-engine/values_sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ celery_broker_type_redis: null
# - ALL

# tag [required] is the LLM Engine docker image tag
tag: e360bfb1d21d9d4e7b7fcb6b29ca752095b4d0f4
tag: 2e9d00786419ef44ec5c9d3305d8d6451d6aabfb
# context is a user-specified deployment tag. Can be used to
context: production
image:
Expand Down
36 changes: 36 additions & 0 deletions model-engine/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
.PHONY: install dev-up dev-down dev-migrate dev-server test

MODEL_ENGINE_DIR := $(abspath .)
DB_URL := postgresql://postgres:password@localhost:5432/llm_engine

# Local dev environment variables
LOCAL_ENV := \
LOCAL=true \
GIT_TAG=local \
ML_INFRA_DATABASE_URL=$(DB_URL) \
DEPLOY_SERVICE_CONFIG_PATH=$(MODEL_ENGINE_DIR)/service_configs/service_config_local.yaml \
ML_INFRA_SERVICES_CONFIG_PATH=$(MODEL_ENGINE_DIR)/model_engine_server/core/configs/default.yaml

install:
pip install -r requirements.txt -r requirements-test.txt -r requirements_override.txt
pip install -e .

dev-up:
docker compose -f docker-compose.local.yml up -d
@echo "Waiting for services to be healthy..."
@until docker compose -f docker-compose.local.yml exec postgres pg_isready -U postgres -q; do sleep 1; done
@echo "Postgres ready."
@until docker compose -f docker-compose.local.yml exec redis redis-cli ping | grep -q PONG; do sleep 1; done
@echo "Redis ready."

dev-down:
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
docker compose -f docker-compose.local.yml down

dev-migrate:
$(LOCAL_ENV) bash model_engine_server/db/migrations/run_database_migration.sh

dev-server:
$(LOCAL_ENV) start-fastapi-server --port 5000 --num-workers 1 --debug

test:
pytest tests/unit/
82 changes: 82 additions & 0 deletions model-engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,88 @@ For OpenAI-compatible V2 APIs, we generate Pydantic models from OpenAI's spec:

## Local Development

### Control Plane Local Setup

The control plane (Gateway API server, Service Builder, K8s Cache) can be run entirely
locally without GPU hardware or cloud credentials. Endpoint creation calls succeed
against a fake k8s/SQS/ECR backend, letting you iterate on control plane code quickly.

**Prerequisites:** Python 3.10+, Docker

#### One-time setup

```bash
cd model-engine/

# Install Python dependencies
make install

# Start Postgres + Redis
make dev-up

# Apply database migrations
make dev-migrate
```

#### Run the API server

```bash
make dev-server
```

The gateway starts at http://localhost:5000 with auto-reload on file changes.
Authentication is skipped automatically (`SKIP_AUTH=true`) so any token works.

#### Make API calls

```bash
# List model endpoints
curl http://localhost:5000/v1/model-endpoints \
-H "Authorization: Bearer test-user"

# Create an LLM endpoint (uses fake k8s — no real infra needed)
curl -X POST http://localhost:5000/v1/llm/model-endpoints \
-H "Authorization: Bearer test-user" \
-H "Content-Type: application/json" \
-d '{"name":"local-test","model_name":"meta-llama/Meta-Llama-3.1-8B-Instruct","inference_framework":"vllm","min_workers":0,"max_workers":1,"gpus":1,"gpu_type":"nvidia-ampere-a10","endpoint_type":"sync"}'
```

#### Stop backing services

```bash
make dev-down
```

#### What `LOCAL=true` does

Running with `LOCAL=true` (set automatically by `make dev-server` and `make dev-migrate`):

- Skips the `GIT_TAG` env var requirement
- Uses a **fake queue delegate** (no SQS/Azure Service Bus needed)
- Uses a **fake Docker repository** (no ECR/ACR/GAR needed)
- Auth is skipped when `identity_service_url` is absent from config (default)
- Postgres and Redis are real local services (via docker-compose)

This means you can create/update/delete endpoints via the API and see them reflected
in Postgres, without any Kubernetes cluster or cloud account.

#### Running individual components manually

If you prefer to set env vars yourself rather than use `make`:

```bash
export LOCAL=true
export GIT_TAG=local
export ML_INFRA_DATABASE_URL=postgresql://postgres:password@localhost:5432/llm_engine
export DEPLOY_SERVICE_CONFIG_PATH=$(pwd)/service_configs/service_config_local.yaml
Comment thread
greptile-apps[bot] marked this conversation as resolved.

# Gateway
start-fastapi-server --port 5000 --num-workers 1 --debug

# Database migration
bash model_engine_server/db/migrations/run_database_migration.sh
```

### Testing the HTTP Forwarder

Start an endpoint on port 5005:
Expand Down
23 changes: 23 additions & 0 deletions model-engine/docker-compose.local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
services:
postgres:
image: postgres:15
environment:
POSTGRES_PASSWORD: password
POSTGRES_DB: llm_engine
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
retries: 5

redis:
image: redis:7
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 5
10 changes: 5 additions & 5 deletions model-engine/model_engine_server/api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from model_engine_server.common.aioredis_pool import build_aioredis_pool
from model_engine_server.common.config import hmi_config
from model_engine_server.common.dtos.model_endpoints import BrokerType
from model_engine_server.common.env_vars import CIRCLECI
from model_engine_server.common.env_vars import CIRCLECI, LOCAL
from model_engine_server.core.auth.authentication_repository import AuthenticationRepository, User
from model_engine_server.core.auth.fake_authentication_repository import (
FakeAuthenticationRepository,
Expand Down Expand Up @@ -241,7 +241,7 @@ def _get_external_interfaces(
)

queue_delegate: QueueEndpointResourceDelegate
if CIRCLECI:
if CIRCLECI or LOCAL:
queue_delegate = FakeQueueEndpointResourceDelegate()
elif infra_config().cloud_provider == "onprem":
queue_delegate = OnPremQueueEndpointResourceDelegate()
Expand All @@ -257,8 +257,8 @@ def _get_external_interfaces(

inference_task_queue_gateway: TaskQueueGateway
infra_task_queue_gateway: TaskQueueGateway
if CIRCLECI or infra_config().cloud_provider == "onprem":
# On-prem uses Redis-based task queues
if CIRCLECI or LOCAL or infra_config().cloud_provider == "onprem":
# On-prem and local dev use Redis-based task queues
inference_task_queue_gateway = redis_24h_task_queue_gateway
infra_task_queue_gateway = redis_task_queue_gateway
elif infra_config().cloud_provider == "azure":
Expand Down Expand Up @@ -391,7 +391,7 @@ def _get_external_interfaces(
registry_type = infra_config().docker_registry_type or infer_registry_type(
infra_config().docker_repo_prefix
)
if CIRCLECI:
if CIRCLECI or LOCAL:
docker_repository = FakeDockerRepository()
elif registry_type == "ecr":
docker_repository = ECRDockerRepository()
Expand Down
2 changes: 1 addition & 1 deletion model-engine/model_engine_server/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ def get_boolean_env_var(name: str) -> bool:
logger.warning("LOCAL development & testing mode is ON")

GIT_TAG: str = os.environ.get("GIT_TAG", "GIT_TAG_NOT_FOUND")
if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules:
if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules and not LOCAL:
raise ValueError("GIT_TAG environment variable must be set")
33 changes: 33 additions & 0 deletions model-engine/service_configs/service_config_local.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
gateway_namespace: default
endpoint_namespace: model-engine
model_primitive_host: "none"

# Local Redis (started via docker-compose.local.yml)
# Use onprem_url so it's checked before cloud_provider assertions
cache_redis_onprem_url: redis://localhost:6379/15

sqs_profile: nonexistent_sqs_profile
sqs_queue_policy_template: >
{
"Version": "2012-10-17",
"Statement": []
}
sqs_queue_tag_template: >
{}

billing_queue_arn: none
cloud_file_llm_fine_tune_repository: "s3://local-bucket/fine_tune_repository/local"

dd_trace_enabled: false
istio_enabled: false
sensitive_log_mode: false
tgi_repository: "text-generation-inference"
vllm_repository: "vllm"
lightllm_repository: "lightllm"
tensorrt_llm_repository: "tensorrt-llm"
batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
user_inference_base_repository: "launch/inference"
user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
docker_image_layer_cache_repository: "kaniko-cache"
hf_user_fine_tuned_weights_prefix: "s3://local-bucket/model-weights"