scaleapi · lilyz-ai · Apr 21, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/charts/model-engine/values_sample.yaml b/charts/model-engine/values_sample.yaml
@@ -24,7 +24,7 @@ celery_broker_type_redis: null
 #       - ALL  
 
 # tag [required] is the LLM Engine docker image tag
-tag: e360bfb1d21d9d4e7b7fcb6b29ca752095b4d0f4
+tag: 2e9d00786419ef44ec5c9d3305d8d6451d6aabfb
 # context is a user-specified deployment tag. Can be used to 
 context: production
 image:

diff --git a/model-engine/Makefile b/model-engine/Makefile
@@ -0,0 +1,36 @@
+.PHONY: install dev-up dev-down dev-migrate dev-server test
+
+MODEL_ENGINE_DIR := $(abspath .)
+DB_URL := postgresql://postgres:password@localhost:5432/llm_engine
+
+# Local dev environment variables
+LOCAL_ENV := \
+	LOCAL=true \
+	GIT_TAG=local \
+	ML_INFRA_DATABASE_URL=$(DB_URL) \
+	DEPLOY_SERVICE_CONFIG_PATH=$(MODEL_ENGINE_DIR)/service_configs/service_config_local.yaml \
+	ML_INFRA_SERVICES_CONFIG_PATH=$(MODEL_ENGINE_DIR)/model_engine_server/core/configs/default.yaml
+
+install:
+	pip install -r requirements.txt -r requirements-test.txt -r requirements_override.txt
+	pip install -e .
+
+dev-up:
+	docker compose -f docker-compose.local.yml up -d
+	@echo "Waiting for services to be healthy..."
+	@until docker compose -f docker-compose.local.yml exec postgres pg_isready -U postgres -q; do sleep 1; done
+	@echo "Postgres ready."
+	@until docker compose -f docker-compose.local.yml exec redis redis-cli ping | grep -q PONG; do sleep 1; done
+	@echo "Redis ready."
+
+dev-down:
+	docker compose -f docker-compose.local.yml down
+
+dev-migrate:
+	$(LOCAL_ENV) bash model_engine_server/db/migrations/run_database_migration.sh
+
+dev-server:
+	$(LOCAL_ENV) start-fastapi-server --port 5000 --num-workers 1 --debug
+
+test:
+	pytest tests/unit/
diff --git a/model-engine/README.md b/model-engine/README.md
@@ -129,6 +129,88 @@ For OpenAI-compatible V2 APIs, we generate Pydantic models from OpenAI's spec:
 
 ## Local Development
 
+### Control Plane Local Setup
+
+The control plane (Gateway API server, Service Builder, K8s Cache) can be run entirely
+locally without GPU hardware or cloud credentials. Endpoint creation calls succeed
+against a fake k8s/SQS/ECR backend, letting you iterate on control plane code quickly.
+
+**Prerequisites:** Python 3.10+, Docker
+
+#### One-time setup
+
+```bash
+cd model-engine/
+
+# Install Python dependencies
+make install
+
+# Start Postgres + Redis
+make dev-up
+
+# Apply database migrations
+make dev-migrate
+```
+
+#### Run the API server
+
+```bash
+make dev-server
+```
+
+The gateway starts at http://localhost:5000 with auto-reload on file changes.
+Authentication is skipped automatically (`SKIP_AUTH=true`) so any token works.
+
+#### Make API calls
+
+```bash
+# List model endpoints
+curl http://localhost:5000/v1/model-endpoints \
+  -H "Authorization: Bearer test-user"
+
+# Create an LLM endpoint (uses fake k8s — no real infra needed)
+curl -X POST http://localhost:5000/v1/llm/model-endpoints \
+  -H "Authorization: Bearer test-user" \
+  -H "Content-Type: application/json" \
+  -d '{"name":"local-test","model_name":"meta-llama/Meta-Llama-3.1-8B-Instruct","inference_framework":"vllm","min_workers":0,"max_workers":1,"gpus":1,"gpu_type":"nvidia-ampere-a10","endpoint_type":"sync"}'
+```
+
+#### Stop backing services
+
+```bash
+make dev-down
+```
+
+#### What `LOCAL=true` does
+
+Running with `LOCAL=true` (set automatically by `make dev-server` and `make dev-migrate`):
+
+- Skips the `GIT_TAG` env var requirement
+- Uses a **fake queue delegate** (no SQS/Azure Service Bus needed)
+- Uses a **fake Docker repository** (no ECR/ACR/GAR needed)
+- Auth is skipped when `identity_service_url` is absent from config (default)
+- Postgres and Redis are real local services (via docker-compose)
+
+This means you can create/update/delete endpoints via the API and see them reflected
+in Postgres, without any Kubernetes cluster or cloud account.
+
+#### Running individual components manually
+
+If you prefer to set env vars yourself rather than use `make`:
+
+```bash
+export LOCAL=true
+export GIT_TAG=local
+export ML_INFRA_DATABASE_URL=postgresql://postgres:password@localhost:5432/llm_engine
+export DEPLOY_SERVICE_CONFIG_PATH=$(pwd)/service_configs/service_config_local.yaml
+
+# Gateway
+start-fastapi-server --port 5000 --num-workers 1 --debug
+
+# Database migration
+bash model_engine_server/db/migrations/run_database_migration.sh
+```
+
 ### Testing the HTTP Forwarder
 
 Start an endpoint on port 5005:

diff --git a/model-engine/docker-compose.local.yml b/model-engine/docker-compose.local.yml
@@ -0,0 +1,23 @@
+services:
+  postgres:
+    image: postgres:15
+    environment:
+      POSTGRES_PASSWORD: password
+      POSTGRES_DB: llm_engine
+    ports:
+      - "5432:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  redis:
+    image: redis:7
+    ports:
+      - "6379:6379"
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
diff --git a/model-engine/model_engine_server/api/dependencies.py b/model-engine/model_engine_server/api/dependencies.py
@@ -10,7 +10,7 @@
 from model_engine_server.common.aioredis_pool import build_aioredis_pool
 from model_engine_server.common.config import hmi_config
 from model_engine_server.common.dtos.model_endpoints import BrokerType
-from model_engine_server.common.env_vars import CIRCLECI
+from model_engine_server.common.env_vars import CIRCLECI, LOCAL
 from model_engine_server.core.auth.authentication_repository import AuthenticationRepository, User
 from model_engine_server.core.auth.fake_authentication_repository import (
     FakeAuthenticationRepository,
@@ -241,7 +241,7 @@ def _get_external_interfaces(
     )
 
     queue_delegate: QueueEndpointResourceDelegate
-    if CIRCLECI:
+    if CIRCLECI or LOCAL:
         queue_delegate = FakeQueueEndpointResourceDelegate()
     elif infra_config().cloud_provider == "onprem":
         queue_delegate = OnPremQueueEndpointResourceDelegate()
@@ -257,8 +257,8 @@ def _get_external_interfaces(
 
     inference_task_queue_gateway: TaskQueueGateway
     infra_task_queue_gateway: TaskQueueGateway
-    if CIRCLECI or infra_config().cloud_provider == "onprem":
-        # On-prem uses Redis-based task queues
+    if CIRCLECI or LOCAL or infra_config().cloud_provider == "onprem":
+        # On-prem and local dev use Redis-based task queues
         inference_task_queue_gateway = redis_24h_task_queue_gateway
         infra_task_queue_gateway = redis_task_queue_gateway
     elif infra_config().cloud_provider == "azure":
@@ -391,7 +391,7 @@ def _get_external_interfaces(
     registry_type = infra_config().docker_registry_type or infer_registry_type(
         infra_config().docker_repo_prefix
     )
-    if CIRCLECI:
+    if CIRCLECI or LOCAL:
         docker_repository = FakeDockerRepository()
     elif registry_type == "ecr":
         docker_repository = ECRDockerRepository()

diff --git a/model-engine/model_engine_server/common/env_vars.py b/model-engine/model_engine_server/common/env_vars.py
@@ -76,5 +76,5 @@ def get_boolean_env_var(name: str) -> bool:
     logger.warning("LOCAL development & testing mode is ON")
 
 GIT_TAG: str = os.environ.get("GIT_TAG", "GIT_TAG_NOT_FOUND")
-if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules:
+if GIT_TAG == "GIT_TAG_NOT_FOUND" and "pytest" not in sys.modules and not LOCAL:
     raise ValueError("GIT_TAG environment variable must be set")
diff --git a/model-engine/service_configs/service_config_local.yaml b/model-engine/service_configs/service_config_local.yaml
@@ -0,0 +1,33 @@
+gateway_namespace: default
+endpoint_namespace: model-engine
+model_primitive_host: "none"
+
+# Local Redis (started via docker-compose.local.yml)
+# Use onprem_url so it's checked before cloud_provider assertions
+cache_redis_onprem_url: redis://localhost:6379/15
+
+sqs_profile: nonexistent_sqs_profile
+sqs_queue_policy_template: >
+  {
+    "Version": "2012-10-17",
+    "Statement": []
+  }
+sqs_queue_tag_template: >
+  {}
+
+billing_queue_arn: none
+cloud_file_llm_fine_tune_repository: "s3://local-bucket/fine_tune_repository/local"
+
+dd_trace_enabled: false
+istio_enabled: false
+sensitive_log_mode: false
+tgi_repository: "text-generation-inference"
+vllm_repository: "vllm"
+lightllm_repository: "lightllm"
+tensorrt_llm_repository: "tensorrt-llm"
+batch_inference_vllm_repository: "llm-engine/batch-infer-vllm"
+user_inference_base_repository: "launch/inference"
+user_inference_pytorch_repository: "hosted-model-inference/async-pytorch"
+user_inference_tensorflow_repository: "hosted-model-inference/async-tensorflow-cpu"
+docker_image_layer_cache_repository: "kaniko-cache"
+hf_user_fine_tuned_weights_prefix: "s3://local-bucket/model-weights"