From 0af3d94d0fabc9700f2e0ea867d0a935c1cb7dc4 Mon Sep 17 00:00:00 2001 From: mathumathi Date: Tue, 10 Mar 2026 15:34:24 +0530 Subject: [PATCH 1/8] [CHORE] Consolidate env config into single common.env with zero duplication - Create docker/sample.common.env as single source of truth for all shared env vars (DB, Redis, RabbitMQ, MinIO, Flipt, service URLs, timeouts) - Remove docker/sample.essentials.env (merged into common.env) - Eliminate credential duplication by using Docker Compose environment mappings to derive infra vars from app vars (e.g. POSTGRES_USER from DB_USER, RABBITMQ_DEFAULT_USER from CELERY_BROKER_USER) - Standardize DB env var names across services: PG_BE_* -> DB_*, DB_USERNAME -> DB_USER, REDIS_USERNAME -> REDIS_USER - Strip shared vars from per-service sample.env files (backend, workers, prompt-service, runner, platform-service, x2text-service) - Update docker-compose files to use .env (auto-read by Docker Compose for variable substitution) - Update docker/README.md with simplified setup instructions Co-Authored-By: Claude Opus 4.6 --- backend/sample.env | 78 ++---------- docker/README.md | 16 ++- docker/docker-compose-dev-essentials.yaml | 26 +++- docker/docker-compose.yaml | 41 +++--- docker/sample.common.env | 118 ++++++++++++++++++ docker/sample.essentials.env | 19 --- platform-service/sample.env | 29 ++--- .../src/unstract/platform_service/config.py | 10 +- .../src/unstract/platform_service/env.py | 12 +- .../unstract/platform_service/extensions.py | 2 +- prompt-service/sample.env | 61 +-------- .../src/unstract/prompt_service/extensions.py | 10 +- runner/sample.env | 31 +---- workers/sample.env | 77 ++---------- x2text-service/app/env.py | 2 +- x2text-service/app/models.py | 2 +- x2text-service/sample.env | 14 +-- 17 files changed, 239 insertions(+), 309 deletions(-) create mode 100644 docker/sample.common.env delete mode 100644 docker/sample.essentials.env diff --git a/backend/sample.env b/backend/sample.env index 909f02e1f9..9b32295587 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -1,3 +1,10 @@ +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). +# ----------------------------------------------------------------------------- + DJANGO_SETTINGS_MODULE='backend.settings.dev' # NOTE: Change below to True if you are running in HTTPS mode. @@ -14,22 +21,9 @@ PATH_PREFIX="api/v1" DJANGO_APP_BACKEND_URL=http://frontend.unstract.localhost DJANGO_SECRET_KEY="1(xf&nc6!y7!l&!5xe&i_rx7e^m@fcut9fduv86ft=-b@2g6" -# Postgres DB envs -DB_HOST='unstract-db' -DB_USER='unstract_dev' -DB_PASSWORD='unstract_pass' -DB_NAME='unstract_db' -DB_PORT=5432 -DB_SCHEMA="unstract" - # Celery Backend Database (optional - defaults to DB_NAME if unset) # Example: # CELERY_BACKEND_DB_NAME=unstract_celery_db -# Redis -REDIS_HOST="unstract-redis" -REDIS_PORT=6379 -REDIS_PASSWORD="" -REDIS_USER=default # Redis Retry Configuration # Controls automatic retry behavior for transient Redis connection failures @@ -74,20 +68,6 @@ GOOGLE_STORAGE_ACCESS_KEY_ID= GOOGLE_STORAGE_SECRET_ACCESS_KEY= GOOGLE_STORAGE_BASE_URL=https://storage.googleapis.com -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# Tool Runner -UNSTRACT_RUNNER_HOST=http://unstract-runner -UNSTRACT_RUNNER_PORT=5002 -UNSTRACT_RUNNER_API_TIMEOUT=240 # (in seconds) 2 mins -UNSTRACT_RUNNER_API_RETRY_COUNT=5 # Number of retries for failed requests -UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 # Exponential backoff factor for retries - -# Prompt Service -PROMPT_HOST=http://unstract-prompt-service -PROMPT_PORT=3003 #Prompt Studio PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data @@ -98,15 +78,6 @@ STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.97" STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure" STRUCTURE_TOOL_IMAGE_TAG="0.0.97" -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - - -#X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 # Encryption Key # Key must be 32 url-safe base64-encoded bytes. Check the README.md for details @@ -142,14 +113,6 @@ SYSTEM_ADMIN_EMAIL="admin@abc.com" # Set Django Session Expiry Time (in seconds) SESSION_COOKIE_AGE=86400 -# Control async extraction of LLMWhisperer -# Time in seconds to wait before polling LLMWhisperer's status API -ADAPTER_LLMW_POLL_INTERVAL=30 -# Total number of times to poll the status API. -# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page) -ADAPTER_LLMW_MAX_POLLS=1000 -# Number of times to retry the /whisper-status API before failing the extraction -ADAPTER_LLMW_STATUS_RETRIES=5 # Enable logging of workflow history. ENABLE_LOG_HISTORY=True @@ -157,14 +120,6 @@ ENABLE_LOG_HISTORY=True LOG_HISTORY_CONSUMER_INTERVAL=30 # Maximum number of logs to insert in a single batch. LOGS_BATCH_LIMIT=30 -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - -# Celery Configuration -# Used by celery and to connect to queue to push logs -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password # Indexing flag to prevent re-index INDEXING_FLAG_TTL=1800 @@ -176,25 +131,6 @@ NOTIFICATION_TIMEOUT=5 # with a YAML and JSONs TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config" -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - -# File System Configuration for Workflow and API Execution - -# Directory Prefixes for storing execution files -WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" -API_EXECUTION_DIR_PREFIX="unstract/api" - -# Storage Provider for Workflow Execution -# Valid options: MINIO, S3, etc.. -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# Storage Provider for API Execution -API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -#Remote storage related envs -PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" # Storage Provider for Tool registry TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}' diff --git a/docker/README.md b/docker/README.md index 4990ffe314..a575028729 100644 --- a/docker/README.md +++ b/docker/README.md @@ -15,7 +15,21 @@ VERSION=dev docker compose -f docker-compose.build.yaml --profile optional build ## Docker Run -**NOTE**: First copy `sample.*.env` files to `*.env` and update as required. +**NOTE**: Before running, set up your env files: + +```bash +# 1. Common env vars shared across all services and infrastructure +# (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, timeouts) +cp docker/sample.common.env docker/.env + +# 2. Per-service env files (service-specific settings only) +cp backend/sample.env backend/.env +cp platform-service/sample.env platform-service/.env +cp prompt-service/sample.env prompt-service/.env +cp x2text-service/sample.env x2text-service/.env +cp runner/sample.env runner/.env +cp workers/sample.env workers/.env +``` ```bash # Up all services diff --git a/docker/docker-compose-dev-essentials.yaml b/docker/docker-compose-dev-essentials.yaml index d0ec7b4ba2..9638ae652a 100644 --- a/docker/docker-compose-dev-essentials.yaml +++ b/docker/docker-compose-dev-essentials.yaml @@ -11,7 +11,13 @@ services: - postgres_data:/var/lib/postgresql/data/ - ./scripts/db-setup/db_setup.sh:/docker-entrypoint-initdb.d/db_setup.sh env_file: - - ./essentials.env + - ./.env + environment: + # Map app DB_* vars to Postgres container's expected POSTGRES_* vars + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + POSTGRES_DB: ${DB_NAME} + POSTGRES_SCHEMA: ${DB_SCHEMA} labels: - traefik.enable=false @@ -39,7 +45,7 @@ services: volumes: - minio_data:/data env_file: - - ./essentials.env + - ./.env command: server /data --console-address ":9001" labels: - traefik.enable=true @@ -100,6 +106,14 @@ services: - "9005:9000" # gRPC port volumes: - flipt_data:/var/opt/flipt + # https://www.flipt.io/docs/configuration/overview#environment-variables) + # https://www.flipt.io/docs/configuration/overview#configuration-parameters + env_file: + - ./.env + environment: + FLIPT_CACHE_ENABLED: true + # Flipt DB connection derived from app DB_* vars + FLIPT_DB_URL: "postgres://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME}?sslmode=disable" labels: - traefik.enable=true - traefik.http.routers.feature-flag.rule=Host(`feature-flag.unstract.localhost`) @@ -128,7 +142,7 @@ services: labels: - traefik.enable=false env_file: - - ./essentials.env + - ./.env rabbitmq: image: rabbitmq:4.1.0-management @@ -136,7 +150,11 @@ services: hostname: unstract-rabbit restart: unless-stopped env_file: - - ./essentials.env + - ./.env + environment: + # Map app CELERY_BROKER_* vars to RabbitMQ container's expected vars + RABBITMQ_DEFAULT_USER: ${CELERY_BROKER_USER} + RABBITMQ_DEFAULT_PASS: ${CELERY_BROKER_PASS} ports: - "5672:5672" # AMQP port - "15672:15672" # Management UI port diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 6f1996818a..92b0eb2134 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -12,6 +12,7 @@ services: ports: - "8000:8000" env_file: + - ./.env - ../backend/.env depends_on: - db @@ -26,7 +27,7 @@ services: volumes: - prompt_studio_data:/app/prompt-studio-data - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config environment: - ENVIRONMENT=development - APPLICATION_NAME=unstract-backend @@ -46,6 +47,7 @@ services: entrypoint: .venv/bin/celery command: "-A backend worker --loglevel=info -Q dashboard_metric_events --autoscale=${WORKER_METRICS_AUTOSCALE:-4,1}" env_file: + - ./.env - ../backend/.env depends_on: - rabbitmq @@ -65,6 +67,7 @@ services: entrypoint: .venv/bin/celery command: "-A backend flower --port=5555 --purge_offline_workers=5" env_file: + - ./.env - ../backend/.env depends_on: - rabbitmq @@ -88,8 +91,8 @@ services: entrypoint: .venv/bin/celery command: "-A backend beat --scheduler django_celery_beat.schedulers:DatabaseScheduler -l INFO" env_file: + - ./.env - ../backend/.env - - ./essentials.env depends_on: - db - rabbitmq @@ -121,6 +124,7 @@ services: ports: - "3001:3001" env_file: + - ./.env - ../platform-service/.env depends_on: - redis @@ -140,6 +144,7 @@ services: ports: - "3003:3003" env_file: + - ./.env - ../prompt-service/.env labels: - traefik.enable=false @@ -154,6 +159,7 @@ services: ports: - "3004:3004" env_file: + - ./.env - ../x2text-service/.env depends_on: - db @@ -167,6 +173,7 @@ services: ports: - 5002:5002 env_file: + - ./.env - ../runner/.env volumes: - ./workflow_data:/data @@ -190,8 +197,8 @@ services: ports: - "8085:8090" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -212,7 +219,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-callback-v2: image: unstract/worker-unified:${VERSION} @@ -222,8 +229,8 @@ services: ports: - "8086:8083" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -238,7 +245,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-file-processing-v2: image: unstract/worker-unified:${VERSION} @@ -263,8 +270,8 @@ services: ports: - "8087:8082" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -290,7 +297,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-general-v2: image: unstract/worker-unified:${VERSION} @@ -300,8 +307,8 @@ services: ports: - "8088:8082" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -317,7 +324,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-notification-v2: image: unstract/worker-unified:${VERSION} @@ -327,8 +334,8 @@ services: ports: - "8089:8085" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -365,7 +372,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-log-consumer-v2: image: unstract/worker-unified:${VERSION} @@ -375,8 +382,8 @@ services: ports: - "8090:8084" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -414,7 +421,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-log-history-scheduler-v2: image: unstract/worker-unified:${VERSION} @@ -423,8 +430,8 @@ services: entrypoint: ["/bin/bash"] command: ["/app/log_consumer/scheduler.sh"] env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -447,8 +454,8 @@ services: ports: - "8091:8087" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -481,7 +488,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config volumes: prompt_studio_data: diff --git a/docker/sample.common.env b/docker/sample.common.env new file mode 100644 index 0000000000..576095014c --- /dev/null +++ b/docker/sample.common.env @@ -0,0 +1,118 @@ +# ============================================================================= +# Common environment variables shared across all Unstract services and infra. +# Copy this file to .env and update the values for your environment. +# +# This single file is used by both: +# - Infrastructure containers (Postgres, MinIO, RabbitMQ, Flipt, Qdrant) +# - Application services (backend, workers, platform-service, etc.) +# +# For local development outside Docker, replace container hostnames: +# unstract-db -> localhost +# unstract-redis -> localhost +# unstract-rabbitmq -> localhost (CELERY_BROKER_BASE_URL=amqp://localhost:5672//) +# unstract-flipt -> localhost (EVALUATION_SERVER_PORT=9005 for host-mapped port) +# unstract-minio -> localhost (in storage credential JSON blocks) +# unstract-platform-service -> localhost +# unstract-prompt-service -> localhost +# unstract-x2text-service -> localhost +# unstract-runner -> localhost +# ============================================================================= + +# ============================================================================= +# PostgreSQL +# These DB_* vars are used by app services AND automatically mapped to +# POSTGRES_* for the Postgres container via docker-compose environment blocks. +# You only need to set them once here. +# ============================================================================= +DB_HOST=unstract-db +DB_PORT=5432 +DB_USER=unstract_dev +DB_PASSWORD=unstract_pass +DB_NAME=unstract_db +DB_SCHEMA=unstract + +# ============================================================================= +# Redis +# ============================================================================= +REDIS_HOST=unstract-redis +REDIS_PORT=6379 +REDIS_USER=default +REDIS_PASSWORD= + +# ============================================================================= +# RabbitMQ / Celery Broker +# These CELERY_BROKER_* vars are used by app services AND automatically mapped +# to RABBITMQ_* for the RabbitMQ container via docker-compose environment blocks. +# You only need to set them once here. +# ============================================================================= +CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// +CELERY_BROKER_USER=admin +CELERY_BROKER_PASS=password + +# ============================================================================= +# MinIO (Object Storage) +# MINIO_ROOT_USER/PASSWORD are used by the MinIO container on init. +# Update the JSON blocks below if you change these credentials. +# ============================================================================= +MINIO_ROOT_USER=minio +MINIO_ROOT_PASSWORD=minio123 + +WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" + +# File execution directory prefixes +WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" +API_EXECUTION_DIR_PREFIX="unstract/api" + +# ============================================================================= +# Flipt (Feature Flags) +# ============================================================================= +FLIPT_SERVICE_AVAILABLE=False +EVALUATION_SERVER_IP=unstract-flipt +EVALUATION_SERVER_PORT=9000 +PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +# ============================================================================= +# Qdrant (Vector DB) +# ============================================================================= +QDRANT_USER=unstract_vector_dev +QDRANT_PASS=unstract_vector_pass +QDRANT_DB=unstract_vector_db + +# ============================================================================= +# Inter-Service Communication +# ============================================================================= + +# Platform Service +PLATFORM_SERVICE_HOST=http://unstract-platform-service +PLATFORM_SERVICE_PORT=3001 + +# Prompt Service +PROMPT_HOST=http://unstract-prompt-service +PROMPT_PORT=3003 + +# X2Text Service +X2TEXT_HOST=http://unstract-x2text-service +X2TEXT_PORT=3004 + +# Tool Runner +UNSTRACT_RUNNER_HOST=http://unstract-runner +UNSTRACT_RUNNER_PORT=5002 +UNSTRACT_RUNNER_API_TIMEOUT=240 +UNSTRACT_RUNNER_API_RETRY_COUNT=5 +UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 + +# ============================================================================= +# Shared Timeouts & Settings +# ============================================================================= + +# Logs expiry (24 hours) +LOGS_EXPIRATION_TIME_IN_SECOND=86400 + +# LLMWhisperer async extraction settings +ADAPTER_LLMW_POLL_INTERVAL=30 +ADAPTER_LLMW_MAX_POLLS=1000 +ADAPTER_LLMW_STATUS_RETRIES=5 diff --git a/docker/sample.essentials.env b/docker/sample.essentials.env deleted file mode 100644 index 51876fb8f9..0000000000 --- a/docker/sample.essentials.env +++ /dev/null @@ -1,19 +0,0 @@ -# Refer https://hub.docker.com/_/postgres#:~:text=How%20to%20extend%20this%20image -POSTGRES_USER=unstract_dev -POSTGRES_PASSWORD=unstract_pass -POSTGRES_DB=unstract_db -# Used by db setup script -POSTGRES_SCHEMA=unstract - -MINIO_ROOT_USER=minio -MINIO_ROOT_PASSWORD=minio123 -MINIO_ACCESS_KEY=minio -MINIO_SECRET_KEY=minio123 - -QDRANT_USER=unstract_vector_dev -QDRANT_PASS=unstract_vector_pass -QDRANT_DB=unstract_vector_db - -# RabbitMQ related envs -RABBITMQ_DEFAULT_USER=admin -RABBITMQ_DEFAULT_PASS=password diff --git a/platform-service/sample.env b/platform-service/sample.env index 54eec703c3..21638620b8 100644 --- a/platform-service/sample.env +++ b/platform-service/sample.env @@ -1,33 +1,18 @@ +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). +# ----------------------------------------------------------------------------- + # Flask FLASK_APP=src/unstract/platform_service/run.py FLASK_RUN_PORT=3001 -# Redis -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_USERNAME=default -REDIS_PASSWORD= - -# Backend DB -PG_BE_HOST=unstract-db -PG_BE_PORT=5432 -PG_BE_USERNAME=unstract_dev -PG_BE_PASSWORD=unstract_pass -PG_BE_DATABASE=unstract_db -DB_SCHEMA="unstract" - - # Encryption Key # key must be 32 url-safe base64-encoded bytes. ENCRYPTION_KEY="Sample-Key" -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - # Cost calculation related ENVs MODEL_PRICES_URL="https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" MODEL_PRICES_TTL_IN_DAYS=7 diff --git a/platform-service/src/unstract/platform_service/config.py b/platform-service/src/unstract/platform_service/config.py index 08fff41307..40df343a47 100644 --- a/platform-service/src/unstract/platform_service/config.py +++ b/platform-service/src/unstract/platform_service/config.py @@ -38,11 +38,11 @@ def create_app() -> Flask: # Initialize and connect to the database db.init( - database=Env.PG_BE_DATABASE, - user=Env.PG_BE_USERNAME, - password=Env.PG_BE_PASSWORD, - host=Env.PG_BE_HOST, - port=Env.PG_BE_PORT, + database=Env.DB_NAME, + user=Env.DB_USER, + password=Env.DB_PASSWORD, + host=Env.DB_HOST, + port=Env.DB_PORT, options=f"-c application_name={Env.APPLICATION_NAME}", ) diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index 2bcf2da382..aa7664651c 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -10,13 +10,13 @@ class Env: BAD_REQUEST = "Bad Request" REDIS_HOST = EnvManager.get_required_setting("REDIS_HOST") REDIS_PORT = int(EnvManager.get_required_setting("REDIS_PORT", 6379)) - REDIS_USERNAME = os.environ.get("REDIS_USERNAME") + REDIS_USER = os.environ.get("REDIS_USER") REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") - PG_BE_HOST = os.environ.get("PG_BE_HOST") - PG_BE_PORT = int(os.environ.get("PG_BE_PORT", 5432)) - PG_BE_USERNAME = os.environ.get("PG_BE_USERNAME") - PG_BE_PASSWORD = os.environ.get("PG_BE_PASSWORD") - PG_BE_DATABASE = os.environ.get("PG_BE_DATABASE") + DB_HOST = os.environ.get("DB_HOST") + DB_PORT = int(os.environ.get("DB_PORT", 5432)) + DB_USER = os.environ.get("DB_USER") + DB_PASSWORD = os.environ.get("DB_PASSWORD") + DB_NAME = os.environ.get("DB_NAME") ENCRYPTION_KEY = EnvManager.get_required_setting("ENCRYPTION_KEY") MODEL_PRICES_URL = EnvManager.get_required_setting("MODEL_PRICES_URL") MODEL_PRICES_TTL_IN_DAYS = int( diff --git a/platform-service/src/unstract/platform_service/extensions.py b/platform-service/src/unstract/platform_service/extensions.py index d847405791..0798e6b15f 100644 --- a/platform-service/src/unstract/platform_service/extensions.py +++ b/platform-service/src/unstract/platform_service/extensions.py @@ -25,7 +25,7 @@ def get_redis_pool() -> redis.ConnectionPool: _redis_pool = redis.ConnectionPool( host=Env.REDIS_HOST, port=Env.REDIS_PORT, - username=Env.REDIS_USERNAME, + username=Env.REDIS_USER, password=Env.REDIS_PASSWORD, max_connections=10, decode_responses=False, diff --git a/prompt-service/sample.env b/prompt-service/sample.env index e26e6cbcd2..add6546a68 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -1,65 +1,16 @@ -# Backend DB -PG_BE_HOST=unstract-db -PG_BE_PORT=5432 -PG_BE_USERNAME=unstract_dev -PG_BE_PASSWORD=unstract_pass -PG_BE_DATABASE=unstract_db -DB_SCHEMA="unstract" - -# Redis -REDIS_HOST="unstract-redis" -REDIS_PORT=6379 -REDIS_PASSWORD="" -REDIS_USER=default +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). +# ----------------------------------------------------------------------------- # Logging LOG_LEVEL=INFO - -### Env from `unstract-core` ### -# Celery for PublishLogs -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - - -### Env from `unstract-flags` ### -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - - -### Env from `unstract-sdk` ### -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 - -# Remote storage related envs -PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data/" - # Timeout for LLMW (v2) extraction ADAPTER_LLMW_WAIT_TIMEOUT=900 # 15 mins -# Control async extraction of LLMWhisperer (v1) -# Time in seconds to wait before polling LLMWhisperer's status API -ADAPTER_LLMW_POLL_INTERVAL=30 -# Total number of times to poll the status API. -# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page) -ADAPTER_LLMW_MAX_POLLS=1000 -# Number of times to retry the /whisper-status API before failing the extraction -ADAPTER_LLMW_STATUS_RETRIES=5 - ### Env for Rentroll Service ### # Rentroll Service RENTROLL_SERVICE_HOST=http://unstract-rentroll-service diff --git a/prompt-service/src/unstract/prompt_service/extensions.py b/prompt-service/src/unstract/prompt_service/extensions.py index 1626591dc0..add3149b10 100644 --- a/prompt-service/src/unstract/prompt_service/extensions.py +++ b/prompt-service/src/unstract/prompt_service/extensions.py @@ -8,11 +8,11 @@ from unstract.prompt_service.utils.env_loader import get_env_or_die # Load required environment variables -db_host = get_env_or_die("PG_BE_HOST") -db_port = get_env_or_die("PG_BE_PORT") -db_user = get_env_or_die("PG_BE_USERNAME") -db_pass = get_env_or_die("PG_BE_PASSWORD") -db_name = get_env_or_die("PG_BE_DATABASE") +db_host = get_env_or_die("DB_HOST") +db_port = get_env_or_die("DB_PORT") +db_user = get_env_or_die("DB_USER") +db_pass = get_env_or_die("DB_PASSWORD") +db_name = get_env_or_die("DB_NAME") application_name = env.get("APPLICATION_NAME", "unstract-prompt-service") # Initialize and connect to the database diff --git a/runner/sample.env b/runner/sample.env index 723bc89612..1b31d6f278 100644 --- a/runner/sample.env +++ b/runner/sample.env @@ -1,7 +1,9 @@ -# To pass to tool-sidecar for Kombu's connection -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). +# ----------------------------------------------------------------------------- TOOL_CONTAINER_NETWORK="unstract-network" TOOL_CONTAINER_LABELS="[]" @@ -20,27 +22,6 @@ REMOVE_CONTAINER_ON_EXIT=True # Client module path of the container engine to be used. CONTAINER_CLIENT_PATH=unstract.runner.clients.docker_client -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - -# Feature Flags -FLIPT_SERVICE_AVAILABLE=False -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9005 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - -# File System Configuration for Workflow and API Execution -# Directory Prefixes for storing execution files -WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" -# Storage Provider for Workflow Execution -# Valid options: MINIO, S3, etc.. -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# For unified notification -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_USER=default -REDIS_PASSWORD= # Flask related envs # Can be 'production' or 'development' diff --git a/workers/sample.env b/workers/sample.env index 516fc242e1..8811f3481f 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -2,6 +2,11 @@ # Unstract Workers Environment Configuration # ============================================================================= # Copy this file to .env and update the values for your environment +# +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). # ============================================================================= # Core Configuration (REQUIRED) @@ -43,33 +48,10 @@ INTERNAL_API_ORGANIZATION_PREFIX=v1/organization/ # Celery Configuration # ============================================================================= -# Celery Broker (RabbitMQ) - REQUIRED -# These credentials must match your RabbitMQ configuration -CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password - -# ============================================================================= -# Database Configuration (REQUIRED) -# ============================================================================= - -# PostgreSQL (for Celery result backend) - REQUIRED -# These credentials must match your PostgreSQL configuration -DB_HOST=unstract-db -DB_USER=unstract_dev -DB_PASSWORD=unstract_pass -DB_NAME=unstract_db -DB_PORT=5432 -DB_SCHEMA=unstract - # Celery Backend Database Schema CELERY_BACKEND_DB_SCHEMA=public -# Redis (for caching and queues) - REQUIRED -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_PASSWORD= -REDIS_USER=default +# Redis DB index REDIS_DB=0 # Cache-Specific Redis Configuration @@ -82,9 +64,6 @@ CACHE_REDIS_USERNAME= CACHE_REDIS_SSL=false CACHE_REDIS_SSL_CERT_REQS=required -# Database URL (for fallback usage) -DATABASE_URL=postgresql://unstract_dev:unstract_pass@unstract-db:5432/unstract_db - # ============================================================================= # Worker Infrastructure Settings # ============================================================================= @@ -208,7 +187,6 @@ WORKER_INSTANCE_ID=dev-01 ENABLE_LOG_HISTORY=true LOG_HISTORY_CONSUMER_INTERVAL=30 LOGS_BATCH_LIMIT=30 -LOGS_EXPIRATION_TIME_IN_SECOND=86400 LOG_HISTORY_QUEUE_NAME=log_history_queue # Log Queue Size Protection @@ -226,36 +204,11 @@ NOTIFICATION_QUEUE_NAME=notifications # Backend Services # ============================================================================= -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# Prompt Service -PROMPT_HOST=http://unstract-prompt-service -PROMPT_PORT=3003 - -# X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 - -# Tool Runner -UNSTRACT_RUNNER_HOST=http://unstract-runner -UNSTRACT_RUNNER_PORT=5002 -UNSTRACT_RUNNER_API_TIMEOUT=300 -UNSTRACT_RUNNER_API_RETRY_COUNT=5 -UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 # ============================================================================= # File Storage Configuration # ============================================================================= -# File Storage Credentials (MinIO) -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# File Execution Configuration -WORKFLOW_EXECUTION_DIR_PREFIX=unstract/execution -API_EXECUTION_DIR_PREFIX=unstract/api MAX_PARALLEL_FILE_BATCHES=1 # File Execution TTL Configuration @@ -363,22 +316,8 @@ GOOGLE_OAUTH2_SECRET= # ============================================================================= # Local Development Overrides # ============================================================================= -# For local development (all services on host), change Docker service names to localhost: +# For local development (all services on host), override vars from common.env: # DJANGO_APP_BACKEND_URL=http://localhost:8000 # INTERNAL_API_BASE_URL=http://localhost:8000/internal -# CELERY_BROKER_BASE_URL=amqp://localhost:5672// -# DB_HOST=localhost -# REDIS_HOST=localhost # CACHE_REDIS_HOST=localhost -# PLATFORM_SERVICE_HOST=http://localhost -# PROMPT_HOST=http://localhost -# X2TEXT_HOST=http://localhost -# UNSTRACT_RUNNER_HOST=http://localhost -# WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}} -# API_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}} - -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9005 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + diff --git a/x2text-service/app/env.py b/x2text-service/app/env.py index d2c213fbb7..fb197cec1d 100644 --- a/x2text-service/app/env.py +++ b/x2text-service/app/env.py @@ -48,7 +48,7 @@ class Env: DB_SCHEMA = EnvManager.get_required_setting("DB_SCHEMA") DB_HOST = EnvManager.get_required_setting("DB_HOST") DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) - DB_USERNAME = EnvManager.get_required_setting("DB_USERNAME") + DB_USER = EnvManager.get_required_setting("DB_USER") DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") DB_NAME = EnvManager.get_required_setting("DB_NAME") diff --git a/x2text-service/app/models.py b/x2text-service/app/models.py index b3fb102544..5528e7f7f8 100644 --- a/x2text-service/app/models.py +++ b/x2text-service/app/models.py @@ -7,7 +7,7 @@ be_db = peewee.PostgresqlDatabase( Env.DB_NAME, - user=Env.DB_USERNAME, + user=Env.DB_USER, password=Env.DB_PASSWORD, host=Env.DB_HOST, port=Env.DB_PORT, diff --git a/x2text-service/sample.env b/x2text-service/sample.env index df25e0b09b..f92cbfd677 100644 --- a/x2text-service/sample.env +++ b/x2text-service/sample.env @@ -1,13 +1,13 @@ +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.common.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see common.env header). +# ----------------------------------------------------------------------------- + FLASK_ENV=production FLASK_RUN_HOST=0.0.0.0 FLASK_RUN_PORT=3004 API_URL_PREFIX=/api/v1 -# Postgres -DB_HOST=unstract-db -DB_PORT=5432 -DB_USERNAME=unstract_dev -DB_PASSWORD=unstract_pass -DB_NAME=unstract_db -DB_SCHEMA="unstract" From 4b8051a969c9a9638119be4b98f60435f51b19e9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:21:57 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- workers/sample.env | 1 - x2text-service/sample.env | 2 -- 2 files changed, 3 deletions(-) diff --git a/workers/sample.env b/workers/sample.env index 8811f3481f..db8bfc50ca 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -320,4 +320,3 @@ GOOGLE_OAUTH2_SECRET= # DJANGO_APP_BACKEND_URL=http://localhost:8000 # INTERNAL_API_BASE_URL=http://localhost:8000/internal # CACHE_REDIS_HOST=localhost - diff --git a/x2text-service/sample.env b/x2text-service/sample.env index f92cbfd677..4f178b1499 100644 --- a/x2text-service/sample.env +++ b/x2text-service/sample.env @@ -9,5 +9,3 @@ FLASK_ENV=production FLASK_RUN_HOST=0.0.0.0 FLASK_RUN_PORT=3004 API_URL_PREFIX=/api/v1 - - From 60e86e34efc303f43dc91ad4464b525ed0ad0fd6 Mon Sep 17 00:00:00 2001 From: mathumathi Date: Tue, 10 Mar 2026 16:10:44 +0530 Subject: [PATCH 3/8] [CHORE] Address CodeRabbit review comments - Make backend/sample.env and workers/sample.env headers explicit about dependency on docker/sample.common.env - Fix docker/README.md copy commands to be docker/-relative - Make DB_* vars required in platform-service env.py (use EnvManager.get_required_setting instead of os.environ.get) - Remove unused os import from platform-service env.py - Update worker_config.py validation messages to reference docker/sample.common.env for shared vars - Remove unused REMOTE_MODEL_PRICES_FILE_PATH from platform-service sample.env - Improve MinIO credential sync warning in sample.common.env - Add clarifying comments for REDIS_* vs CACHE_REDIS_* in workers sample.env Co-Authored-By: Claude Opus 4.6 --- backend/sample.env | 13 +++++++++---- docker/README.md | 16 +++++++++------- docker/sample.common.env | 3 ++- platform-service/sample.env | 1 - .../src/unstract/platform_service/env.py | 12 +++++------- workers/sample.env | 19 ++++++++++++------- .../infrastructure/config/worker_config.py | 7 ++++--- 7 files changed, 41 insertions(+), 30 deletions(-) diff --git a/backend/sample.env b/backend/sample.env index 9b32295587..9ef166d102 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -1,8 +1,13 @@ # ----------------------------------------------------------------------------- -# LOCAL DEVELOPMENT NOTE: -# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# WARNING: This file is NOT runnable by itself. +# It must be combined with docker/sample.common.env (copied as docker/.env), +# which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, +# PLATFORM_SERVICE_HOST/PORT, PROMPT_HOST/PORT, X2TEXT_HOST/PORT, +# UNSTRACT_RUNNER_HOST/PORT, MinIO storage credentials, and timeouts. +# Missing these will cause startup failures or misrouted traffic. +# +# For local dev outside Docker, copy shared vars here and replace +# container hostnames with localhost (see docker/sample.common.env header). # ----------------------------------------------------------------------------- DJANGO_SETTINGS_MODULE='backend.settings.dev' diff --git a/docker/README.md b/docker/README.md index a575028729..71155fa138 100644 --- a/docker/README.md +++ b/docker/README.md @@ -18,17 +18,19 @@ VERSION=dev docker compose -f docker-compose.build.yaml --profile optional build **NOTE**: Before running, set up your env files: ```bash +# Run from the docker/ directory: + # 1. Common env vars shared across all services and infrastructure # (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, timeouts) -cp docker/sample.common.env docker/.env +cp sample.common.env .env # 2. Per-service env files (service-specific settings only) -cp backend/sample.env backend/.env -cp platform-service/sample.env platform-service/.env -cp prompt-service/sample.env prompt-service/.env -cp x2text-service/sample.env x2text-service/.env -cp runner/sample.env runner/.env -cp workers/sample.env workers/.env +cp ../backend/sample.env ../backend/.env +cp ../platform-service/sample.env ../platform-service/.env +cp ../prompt-service/sample.env ../prompt-service/.env +cp ../x2text-service/sample.env ../x2text-service/.env +cp ../runner/sample.env ../runner/.env +cp ../workers/sample.env ../workers/.env ``` ```bash diff --git a/docker/sample.common.env b/docker/sample.common.env index 576095014c..eb160e5bba 100644 --- a/docker/sample.common.env +++ b/docker/sample.common.env @@ -52,7 +52,8 @@ CELERY_BROKER_PASS=password # ============================================================================= # MinIO (Object Storage) # MINIO_ROOT_USER/PASSWORD are used by the MinIO container on init. -# Update the JSON blocks below if you change these credentials. +# IMPORTANT: If you change these, update ALL FOUR JSON blocks below too. +# The "key" and "secret" values must match MINIO_ROOT_USER and MINIO_ROOT_PASSWORD. # ============================================================================= MINIO_ROOT_USER=minio MINIO_ROOT_PASSWORD=minio123 diff --git a/platform-service/sample.env b/platform-service/sample.env index 21638620b8..081bc44188 100644 --- a/platform-service/sample.env +++ b/platform-service/sample.env @@ -20,6 +20,5 @@ MODEL_PRICES_FILE_PATH="/cost/model_prices.json" #Remote storage config FILE_STORAGE_CREDENTIALS='{"provider":"local"}' -REMOTE_MODEL_PRICES_FILE_PATH="unstract/cost/model_prices.json" LOG_LEVEL=INFO diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index aa7664651c..acd83ad847 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -1,5 +1,3 @@ -import os - from unstract.platform_service.constants import LogLevel from unstract.platform_service.utils import EnvManager @@ -12,11 +10,11 @@ class Env: REDIS_PORT = int(EnvManager.get_required_setting("REDIS_PORT", 6379)) REDIS_USER = os.environ.get("REDIS_USER") REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") - DB_HOST = os.environ.get("DB_HOST") - DB_PORT = int(os.environ.get("DB_PORT", 5432)) - DB_USER = os.environ.get("DB_USER") - DB_PASSWORD = os.environ.get("DB_PASSWORD") - DB_NAME = os.environ.get("DB_NAME") + DB_HOST = EnvManager.get_required_setting("DB_HOST") + DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) + DB_USER = EnvManager.get_required_setting("DB_USER") + DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") + DB_NAME = EnvManager.get_required_setting("DB_NAME") ENCRYPTION_KEY = EnvManager.get_required_setting("ENCRYPTION_KEY") MODEL_PRICES_URL = EnvManager.get_required_setting("MODEL_PRICES_URL") MODEL_PRICES_TTL_IN_DAYS = int( diff --git a/workers/sample.env b/workers/sample.env index db8bfc50ca..6ebfc9c92f 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -1,12 +1,15 @@ # ============================================================================= # Unstract Workers Environment Configuration # ============================================================================= -# Copy this file to .env and update the values for your environment +# Copy this file to .env and update the values for your environment. # -# LOCAL DEVELOPMENT NOTE: -# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# WARNING: This file is NOT runnable by itself. +# It must be combined with docker/sample.common.env (copied as docker/.env), +# which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, +# and other infrastructure settings. Missing these will cause startup failures. +# +# For local dev outside Docker, copy shared vars here and replace +# container hostnames with localhost (see docker/sample.common.env header). # ============================================================================= # Core Configuration (REQUIRED) @@ -51,10 +54,12 @@ INTERNAL_API_ORGANIZATION_PREFIX=v1/organization/ # Celery Backend Database Schema CELERY_BACKEND_DB_SCHEMA=public -# Redis DB index +# Redis DB index (general Redis settings come from docker/sample.common.env) REDIS_DB=0 -# Cache-Specific Redis Configuration +# Cache-Specific Redis Configuration (worker-specific overrides) +# These override the base REDIS_* vars from common.env for worker cache operations. +# For Docker: use unstract-redis. For local dev: use localhost. CACHE_REDIS_ENABLED=true CACHE_REDIS_HOST=unstract-redis CACHE_REDIS_PORT=6379 diff --git a/workers/shared/infrastructure/config/worker_config.py b/workers/shared/infrastructure/config/worker_config.py index eb1c0b8e79..328a55597a 100644 --- a/workers/shared/infrastructure/config/worker_config.py +++ b/workers/shared/infrastructure/config/worker_config.py @@ -372,7 +372,8 @@ def __post_init__(self): f"Worker configuration validation failed (worker will continue with defaults): {e}" ) logging.info( - "To fix this, ensure all required environment variables are set. See workers/sample.env" + "To fix this, ensure all required environment variables are set. " + "See docker/sample.common.env for shared vars and workers/sample.env for worker-specific vars" ) def _build_cache_redis_url(self): @@ -427,7 +428,7 @@ def validate(self): "CELERY_BROKER_URL could not be built. Please set the following environment variables: " "CELERY_BROKER_BASE_URL (e.g., 'amqp://unstract-rabbitmq:5672//'), " "CELERY_BROKER_USER, and CELERY_BROKER_PASS. " - "See workers/sample.env for examples." + "See docker/sample.common.env for these shared vars." ) if not self.celery_result_backend: @@ -435,7 +436,7 @@ def validate(self): "CELERY_RESULT_BACKEND could not be built. Please set the following environment variables: " "DB_HOST, DB_USER, DB_PASSWORD, DB_NAME, and DB_PORT. " "These are required for Celery to store task results. " - "See workers/sample.env for examples." + "See docker/sample.common.env for these shared vars." ) # Cache Redis validation From f1fd9b55b43632445f8d064aa247c1af5229208c Mon Sep 17 00:00:00 2001 From: mathumathi Date: Tue, 10 Mar 2026 16:24:01 +0530 Subject: [PATCH 4/8] [FIX] Update run-platform.sh and db-setup README for common.env - Replace sample.essentials.env reference with sample.common.env in run-platform.sh (fixes CI build failure) - Keep sample.env merge for compose-level vars (worker settings, etc.) - Update db-setup README to reference common.env and explain env mappings Co-Authored-By: Claude Opus 4.6 --- docker/scripts/db-setup/README.md | 8 ++++---- run-platform.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/scripts/db-setup/README.md b/docker/scripts/db-setup/README.md index e24b9c875b..525de67c49 100644 --- a/docker/scripts/db-setup/README.md +++ b/docker/scripts/db-setup/README.md @@ -1,9 +1,9 @@ # Unstract DB Setup Script -[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps setup the postgres database by making use of environment variables defined in the `.essentials.env` (user copy of the [sample.essentials.env](/docker/sample.essentials.env)) +[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps setup the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.common.env](/docker/sample.common.env)). The Postgres container receives these via docker-compose environment mappings: -- POSTGRES_USER -- POSTGRES_DB -- POSTGRES_SCHEMA +- POSTGRES_USER (mapped from DB_USER) +- POSTGRES_DB (mapped from DB_NAME) +- POSTGRES_SCHEMA (mapped from DB_SCHEMA) This script helps setup the DB user and creates a new schema as well. diff --git a/run-platform.sh b/run-platform.sh index bcacfa82e8..72dde66ed6 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -238,7 +238,7 @@ setup_env() { fi done - copy_or_merge_envs "$script_dir/docker/sample.essentials.env" "$script_dir/docker/essentials.env" "essential services" + copy_or_merge_envs "$script_dir/docker/sample.common.env" "$script_dir/docker/.env" "common services" copy_or_merge_envs "$script_dir/docker/sample.env" "$script_dir/docker/.env" "docker compose" From 6dd73c1b93a01265dae8317bc5f938157aae4015 Mon Sep 17 00:00:00 2001 From: mathumathi Date: Tue, 10 Mar 2026 17:04:24 +0530 Subject: [PATCH 5/8] [FIX] Restore os import for REDIS_USER/REDIS_PASSWORD in platform-service env.py These optional vars use os.environ.get() (not EnvManager) since they can be empty/unset. The import was accidentally removed in the previous commit. Co-Authored-By: Claude Opus 4.6 --- platform-service/src/unstract/platform_service/env.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index acd83ad847..eea57e99ce 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -1,3 +1,5 @@ +import os + from unstract.platform_service.constants import LogLevel from unstract.platform_service.utils import EnvManager From 3ad217ffc1ea2c1273bb333e31a58110e91896d6 Mon Sep 17 00:00:00 2001 From: mathumathi Date: Tue, 10 Mar 2026 17:13:29 +0530 Subject: [PATCH 6/8] Address remaining CodeRabbit review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add missing POSTGRES_PASSWORD mapping to db-setup README - Fix grammar: "helps setup" → "helps set up" - Fix copy_or_merge_envs to always merge when dest exists, ensuring sample.env gets merged into .env on first setup Co-Authored-By: Claude Opus 4.6 --- docker/scripts/db-setup/README.md | 5 +++-- run-platform.sh | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/scripts/db-setup/README.md b/docker/scripts/db-setup/README.md index 525de67c49..6e7665452a 100644 --- a/docker/scripts/db-setup/README.md +++ b/docker/scripts/db-setup/README.md @@ -1,9 +1,10 @@ # Unstract DB Setup Script -[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps setup the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.common.env](/docker/sample.common.env)). The Postgres container receives these via docker-compose environment mappings: +[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps set up the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.common.env](/docker/sample.common.env)). The Postgres container receives these via docker-compose environment mappings: - POSTGRES_USER (mapped from DB_USER) +- POSTGRES_PASSWORD (mapped from DB_PASSWORD) - POSTGRES_DB (mapped from DB_NAME) - POSTGRES_SCHEMA (mapped from DB_SCHEMA) -This script helps setup the DB user and creates a new schema as well. +This script helps set up the DB user and creates a new schema as well. diff --git a/run-platform.sh b/run-platform.sh index 72dde66ed6..f8cc375566 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -175,7 +175,7 @@ copy_or_merge_envs() { if [ ! -e "$dest_file" ]; then cp "$src_file" "$dest_file" echo -e "Created env for ""$blue_text""$displayed_reason""$default_text"" at ""$blue_text""$dest_file""$default_text""." - elif [ "$opt_only_env" = true ] || [ "$opt_update" = true ]; then + else python3 "$script_dir/docker/scripts/merge_env.py" "$src_file" "$dest_file" if [ $? -ne 0 ]; then exit 1 From 7e0352c4cafa3eefa0b2192f9063202237c44d66 Mon Sep 17 00:00:00 2001 From: mathumathi Date: Wed, 11 Mar 2026 07:46:02 +0530 Subject: [PATCH 7/8] [CHORE] Merge sample.common.env into sample.env, add rolling-deploy fallbacks Address PR review comments: - Merge docker/sample.common.env + docker/sample.env into single docker/sample.env for naming consistency with other folders - Remove unused Qdrant env vars (not read by any Python code) - Remove unused ADAPTER_LLMW_STATUS_RETRIES - Move LLMWhisperer vars to backend and workers sample.env (only services that use them via workflow-execution) - Add backward-compatible fallbacks for renamed env vars (PG_BE_* -> DB_*, DB_USERNAME -> DB_USER, REDIS_USERNAME -> REDIS_USER) to support rolling deployments - Simplify run-platform.sh to single copy_or_merge_envs call - Update all references across READMEs, sample.env headers, and worker_config.py validation messages Co-Authored-By: Claude Opus 4.6 --- backend/sample.env | 8 +- docker/README.md | 4 +- docker/sample.common.env | 119 ------------------ docker/sample.env | 118 +++++++++++++++++ docker/scripts/db-setup/README.md | 2 +- platform-service/sample.env | 4 +- .../src/unstract/platform_service/env.py | 26 +++- prompt-service/sample.env | 4 +- .../src/unstract/prompt_service/extensions.py | 11 +- run-platform.sh | 3 +- runner/sample.env | 4 +- workers/sample.env | 14 ++- .../infrastructure/config/worker_config.py | 6 +- x2text-service/app/env.py | 5 +- x2text-service/sample.env | 4 +- 15 files changed, 178 insertions(+), 154 deletions(-) delete mode 100644 docker/sample.common.env diff --git a/backend/sample.env b/backend/sample.env index 9ef166d102..7b92796369 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -1,13 +1,13 @@ # ----------------------------------------------------------------------------- # WARNING: This file is NOT runnable by itself. -# It must be combined with docker/sample.common.env (copied as docker/.env), +# It must be combined with docker/sample.env (copied as docker/.env), # which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, # PLATFORM_SERVICE_HOST/PORT, PROMPT_HOST/PORT, X2TEXT_HOST/PORT, # UNSTRACT_RUNNER_HOST/PORT, MinIO storage credentials, and timeouts. # Missing these will cause startup failures or misrouted traffic. # # For local dev outside Docker, copy shared vars here and replace -# container hostnames with localhost (see docker/sample.common.env header). +# container hostnames with localhost (see docker/sample.env header). # ----------------------------------------------------------------------------- DJANGO_SETTINGS_MODULE='backend.settings.dev' @@ -190,3 +190,7 @@ HITL_FILES_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endp # File active cache redis db FILE_ACTIVE_CACHE_REDIS_DB=0 + +# LLMWhisperer async extraction settings +ADAPTER_LLMW_POLL_INTERVAL=30 +ADAPTER_LLMW_MAX_POLLS=1000 diff --git a/docker/README.md b/docker/README.md index 71155fa138..b935c7f900 100644 --- a/docker/README.md +++ b/docker/README.md @@ -21,8 +21,8 @@ VERSION=dev docker compose -f docker-compose.build.yaml --profile optional build # Run from the docker/ directory: # 1. Common env vars shared across all services and infrastructure -# (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, timeouts) -cp sample.common.env .env +# (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, worker config) +cp sample.env .env # 2. Per-service env files (service-specific settings only) cp ../backend/sample.env ../backend/.env diff --git a/docker/sample.common.env b/docker/sample.common.env deleted file mode 100644 index eb160e5bba..0000000000 --- a/docker/sample.common.env +++ /dev/null @@ -1,119 +0,0 @@ -# ============================================================================= -# Common environment variables shared across all Unstract services and infra. -# Copy this file to .env and update the values for your environment. -# -# This single file is used by both: -# - Infrastructure containers (Postgres, MinIO, RabbitMQ, Flipt, Qdrant) -# - Application services (backend, workers, platform-service, etc.) -# -# For local development outside Docker, replace container hostnames: -# unstract-db -> localhost -# unstract-redis -> localhost -# unstract-rabbitmq -> localhost (CELERY_BROKER_BASE_URL=amqp://localhost:5672//) -# unstract-flipt -> localhost (EVALUATION_SERVER_PORT=9005 for host-mapped port) -# unstract-minio -> localhost (in storage credential JSON blocks) -# unstract-platform-service -> localhost -# unstract-prompt-service -> localhost -# unstract-x2text-service -> localhost -# unstract-runner -> localhost -# ============================================================================= - -# ============================================================================= -# PostgreSQL -# These DB_* vars are used by app services AND automatically mapped to -# POSTGRES_* for the Postgres container via docker-compose environment blocks. -# You only need to set them once here. -# ============================================================================= -DB_HOST=unstract-db -DB_PORT=5432 -DB_USER=unstract_dev -DB_PASSWORD=unstract_pass -DB_NAME=unstract_db -DB_SCHEMA=unstract - -# ============================================================================= -# Redis -# ============================================================================= -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_USER=default -REDIS_PASSWORD= - -# ============================================================================= -# RabbitMQ / Celery Broker -# These CELERY_BROKER_* vars are used by app services AND automatically mapped -# to RABBITMQ_* for the RabbitMQ container via docker-compose environment blocks. -# You only need to set them once here. -# ============================================================================= -CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password - -# ============================================================================= -# MinIO (Object Storage) -# MINIO_ROOT_USER/PASSWORD are used by the MinIO container on init. -# IMPORTANT: If you change these, update ALL FOUR JSON blocks below too. -# The "key" and "secret" values must match MINIO_ROOT_USER and MINIO_ROOT_PASSWORD. -# ============================================================================= -MINIO_ROOT_USER=minio -MINIO_ROOT_PASSWORD=minio123 - -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" - -# File execution directory prefixes -WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" -API_EXECUTION_DIR_PREFIX="unstract/api" - -# ============================================================================= -# Flipt (Feature Flags) -# ============================================================================= -FLIPT_SERVICE_AVAILABLE=False -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - -# ============================================================================= -# Qdrant (Vector DB) -# ============================================================================= -QDRANT_USER=unstract_vector_dev -QDRANT_PASS=unstract_vector_pass -QDRANT_DB=unstract_vector_db - -# ============================================================================= -# Inter-Service Communication -# ============================================================================= - -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# Prompt Service -PROMPT_HOST=http://unstract-prompt-service -PROMPT_PORT=3003 - -# X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 - -# Tool Runner -UNSTRACT_RUNNER_HOST=http://unstract-runner -UNSTRACT_RUNNER_PORT=5002 -UNSTRACT_RUNNER_API_TIMEOUT=240 -UNSTRACT_RUNNER_API_RETRY_COUNT=5 -UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 - -# ============================================================================= -# Shared Timeouts & Settings -# ============================================================================= - -# Logs expiry (24 hours) -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - -# LLMWhisperer async extraction settings -ADAPTER_LLMW_POLL_INTERVAL=30 -ADAPTER_LLMW_MAX_POLLS=1000 -ADAPTER_LLMW_STATUS_RETRIES=5 diff --git a/docker/sample.env b/docker/sample.env index 2feb36cf2b..da37bc01d7 100644 --- a/docker/sample.env +++ b/docker/sample.env @@ -1,3 +1,121 @@ +# ============================================================================= +# Unstract Docker Environment Configuration +# Copy this file to .env and update the values for your environment. +# +# This file is used by both: +# - Infrastructure containers (Postgres, MinIO, RabbitMQ, Flipt) +# - Application services (backend, workers, platform-service, etc.) +# - Docker Compose variable substitution (worker scaling, celery config) +# +# For local development outside Docker, replace container hostnames: +# unstract-db -> localhost +# unstract-redis -> localhost +# unstract-rabbitmq -> localhost (CELERY_BROKER_BASE_URL=amqp://localhost:5672//) +# unstract-flipt -> localhost (EVALUATION_SERVER_PORT=9005 for host-mapped port) +# unstract-minio -> localhost (in storage credential JSON blocks) +# unstract-platform-service -> localhost +# unstract-prompt-service -> localhost +# unstract-x2text-service -> localhost +# unstract-runner -> localhost +# ============================================================================= + +# ============================================================================= +# PostgreSQL +# These DB_* vars are used by app services AND automatically mapped to +# POSTGRES_* for the Postgres container via docker-compose environment blocks. +# You only need to set them once here. +# +# DEPRECATED ALIASES (kept for rolling-deploy compatibility, remove after full rollout): +# PG_BE_HOST, PG_BE_PORT, PG_BE_USERNAME, PG_BE_PASSWORD, PG_BE_DATABASE +# DB_USERNAME (x2text-service legacy) +# New code falls back to these if DB_* is not set. +# ============================================================================= +DB_HOST=unstract-db +DB_PORT=5432 +DB_USER=unstract_dev +DB_PASSWORD=unstract_pass +DB_NAME=unstract_db +DB_SCHEMA=unstract + +# ============================================================================= +# Redis +# ============================================================================= +REDIS_HOST=unstract-redis +REDIS_PORT=6379 +REDIS_USER=default +REDIS_PASSWORD= + +# ============================================================================= +# RabbitMQ / Celery Broker +# These CELERY_BROKER_* vars are used by app services AND automatically mapped +# to RABBITMQ_* for the RabbitMQ container via docker-compose environment blocks. +# You only need to set them once here. +# ============================================================================= +CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// +CELERY_BROKER_USER=admin +CELERY_BROKER_PASS=password + +# ============================================================================= +# MinIO (Object Storage) +# MINIO_ROOT_USER/PASSWORD are used by the MinIO container on init. +# IMPORTANT: If you change these, update ALL FOUR JSON blocks below too. +# The "key" and "secret" values must match MINIO_ROOT_USER and MINIO_ROOT_PASSWORD. +# ============================================================================= +MINIO_ROOT_USER=minio +MINIO_ROOT_PASSWORD=minio123 + +WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" + +# File execution directory prefixes +WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" +API_EXECUTION_DIR_PREFIX="unstract/api" + +# ============================================================================= +# Flipt (Feature Flags) +# ============================================================================= +FLIPT_SERVICE_AVAILABLE=False +EVALUATION_SERVER_IP=unstract-flipt +EVALUATION_SERVER_PORT=9000 +PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +# ============================================================================= +# Inter-Service Communication +# ============================================================================= + +# Platform Service +PLATFORM_SERVICE_HOST=http://unstract-platform-service +PLATFORM_SERVICE_PORT=3001 + +# Prompt Service +PROMPT_HOST=http://unstract-prompt-service +PROMPT_PORT=3003 + +# X2Text Service +X2TEXT_HOST=http://unstract-x2text-service +X2TEXT_PORT=3004 + +# Tool Runner +UNSTRACT_RUNNER_HOST=http://unstract-runner +UNSTRACT_RUNNER_PORT=5002 +UNSTRACT_RUNNER_API_TIMEOUT=240 +UNSTRACT_RUNNER_API_RETRY_COUNT=5 +UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 + +# ============================================================================= +# Shared Timeouts & Settings +# ============================================================================= + +# Logs expiry (24 hours) +LOGS_EXPIRATION_TIME_IN_SECOND=86400 + +# ============================================================================= +# Docker Compose / Worker Configuration +# ============================================================================= + # Path where public and private tools are registered # with a YAML and JSONs TOOL_REGISTRY_CONFIG_SRC_PATH="${PWD}/../unstract/tool-registry/tool_registry_config" diff --git a/docker/scripts/db-setup/README.md b/docker/scripts/db-setup/README.md index 6e7665452a..6415e9949e 100644 --- a/docker/scripts/db-setup/README.md +++ b/docker/scripts/db-setup/README.md @@ -1,6 +1,6 @@ # Unstract DB Setup Script -[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps set up the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.common.env](/docker/sample.common.env)). The Postgres container receives these via docker-compose environment mappings: +[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps set up the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.env](/docker/sample.env)). The Postgres container receives these via docker-compose environment mappings: - POSTGRES_USER (mapped from DB_USER) - POSTGRES_PASSWORD (mapped from DB_PASSWORD) diff --git a/platform-service/sample.env b/platform-service/sample.env index 081bc44188..473b157643 100644 --- a/platform-service/sample.env +++ b/platform-service/sample.env @@ -1,8 +1,8 @@ # ----------------------------------------------------------------------------- # LOCAL DEVELOPMENT NOTE: # Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). # ----------------------------------------------------------------------------- # Flask diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index eea57e99ce..bf2641cf2d 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -10,13 +10,27 @@ class Env: BAD_REQUEST = "Bad Request" REDIS_HOST = EnvManager.get_required_setting("REDIS_HOST") REDIS_PORT = int(EnvManager.get_required_setting("REDIS_PORT", 6379)) - REDIS_USER = os.environ.get("REDIS_USER") + # REDIS_USER/PASSWORD are optional (local Redis often has no auth) + REDIS_USER = os.environ.get("REDIS_USER") or os.environ.get("REDIS_USERNAME") REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") - DB_HOST = EnvManager.get_required_setting("DB_HOST") - DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) - DB_USER = EnvManager.get_required_setting("DB_USER") - DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") - DB_NAME = EnvManager.get_required_setting("DB_NAME") + # DB vars: new names with fallback to legacy PG_BE_* names for rolling deploys + DB_HOST = EnvManager.get_required_setting( + "DB_HOST", os.environ.get("PG_BE_HOST") + ) + DB_PORT = int( + EnvManager.get_required_setting( + "DB_PORT", os.environ.get("PG_BE_PORT", "5432") + ) + ) + DB_USER = EnvManager.get_required_setting( + "DB_USER", os.environ.get("PG_BE_USERNAME") + ) + DB_PASSWORD = EnvManager.get_required_setting( + "DB_PASSWORD", os.environ.get("PG_BE_PASSWORD") + ) + DB_NAME = EnvManager.get_required_setting( + "DB_NAME", os.environ.get("PG_BE_DATABASE") + ) ENCRYPTION_KEY = EnvManager.get_required_setting("ENCRYPTION_KEY") MODEL_PRICES_URL = EnvManager.get_required_setting("MODEL_PRICES_URL") MODEL_PRICES_TTL_IN_DAYS = int( diff --git a/prompt-service/sample.env b/prompt-service/sample.env index add6546a68..9f804c127f 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -1,8 +1,8 @@ # ----------------------------------------------------------------------------- # LOCAL DEVELOPMENT NOTE: # Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). # ----------------------------------------------------------------------------- # Logging diff --git a/prompt-service/src/unstract/prompt_service/extensions.py b/prompt-service/src/unstract/prompt_service/extensions.py index add3149b10..28019ac890 100644 --- a/prompt-service/src/unstract/prompt_service/extensions.py +++ b/prompt-service/src/unstract/prompt_service/extensions.py @@ -8,11 +8,12 @@ from unstract.prompt_service.utils.env_loader import get_env_or_die # Load required environment variables -db_host = get_env_or_die("DB_HOST") -db_port = get_env_or_die("DB_PORT") -db_user = get_env_or_die("DB_USER") -db_pass = get_env_or_die("DB_PASSWORD") -db_name = get_env_or_die("DB_NAME") +# New names with fallback to legacy PG_BE_* names for rolling deploys +db_host = get_env_or_die("DB_HOST", env.get("PG_BE_HOST")) +db_port = get_env_or_die("DB_PORT", env.get("PG_BE_PORT")) +db_user = get_env_or_die("DB_USER", env.get("PG_BE_USERNAME")) +db_pass = get_env_or_die("DB_PASSWORD", env.get("PG_BE_PASSWORD")) +db_name = get_env_or_die("DB_NAME", env.get("PG_BE_DATABASE")) application_name = env.get("APPLICATION_NAME", "unstract-prompt-service") # Initialize and connect to the database diff --git a/run-platform.sh b/run-platform.sh index f8cc375566..40dfae7064 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -238,8 +238,7 @@ setup_env() { fi done - copy_or_merge_envs "$script_dir/docker/sample.common.env" "$script_dir/docker/.env" "common services" - copy_or_merge_envs "$script_dir/docker/sample.env" "$script_dir/docker/.env" "docker compose" + copy_or_merge_envs "$script_dir/docker/sample.env" "$script_dir/docker/.env" "docker services" if [ "$opt_only_env" = true ]; then diff --git a/runner/sample.env b/runner/sample.env index 1b31d6f278..bcd892192c 100644 --- a/runner/sample.env +++ b/runner/sample.env @@ -1,8 +1,8 @@ # ----------------------------------------------------------------------------- # LOCAL DEVELOPMENT NOTE: # Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). # ----------------------------------------------------------------------------- TOOL_CONTAINER_NETWORK="unstract-network" diff --git a/workers/sample.env b/workers/sample.env index 6ebfc9c92f..74cda9e92b 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -4,12 +4,12 @@ # Copy this file to .env and update the values for your environment. # # WARNING: This file is NOT runnable by itself. -# It must be combined with docker/sample.common.env (copied as docker/.env), +# It must be combined with docker/sample.env (copied as docker/.env), # which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, # and other infrastructure settings. Missing these will cause startup failures. # # For local dev outside Docker, copy shared vars here and replace -# container hostnames with localhost (see docker/sample.common.env header). +# container hostnames with localhost (see docker/sample.env header). # ============================================================================= # Core Configuration (REQUIRED) @@ -54,11 +54,11 @@ INTERNAL_API_ORGANIZATION_PREFIX=v1/organization/ # Celery Backend Database Schema CELERY_BACKEND_DB_SCHEMA=public -# Redis DB index (general Redis settings come from docker/sample.common.env) +# Redis DB index (general Redis settings come from docker/sample.env) REDIS_DB=0 # Cache-Specific Redis Configuration (worker-specific overrides) -# These override the base REDIS_* vars from common.env for worker cache operations. +# These override the base REDIS_* vars from docker/sample.env for worker cache operations. # For Docker: use unstract-redis. For local dev: use localhost. CACHE_REDIS_ENABLED=true CACHE_REDIS_HOST=unstract-redis @@ -318,10 +318,14 @@ GOOGLE_OAUTH2_SECRET= +# LLMWhisperer async extraction settings +ADAPTER_LLMW_POLL_INTERVAL=30 +ADAPTER_LLMW_MAX_POLLS=1000 + # ============================================================================= # Local Development Overrides # ============================================================================= -# For local development (all services on host), override vars from common.env: +# For local development (all services on host), override vars from docker/sample.env: # DJANGO_APP_BACKEND_URL=http://localhost:8000 # INTERNAL_API_BASE_URL=http://localhost:8000/internal # CACHE_REDIS_HOST=localhost diff --git a/workers/shared/infrastructure/config/worker_config.py b/workers/shared/infrastructure/config/worker_config.py index 328a55597a..4a2cab2f41 100644 --- a/workers/shared/infrastructure/config/worker_config.py +++ b/workers/shared/infrastructure/config/worker_config.py @@ -373,7 +373,7 @@ def __post_init__(self): ) logging.info( "To fix this, ensure all required environment variables are set. " - "See docker/sample.common.env for shared vars and workers/sample.env for worker-specific vars" + "See docker/sample.env for shared vars and workers/sample.env for worker-specific vars" ) def _build_cache_redis_url(self): @@ -428,7 +428,7 @@ def validate(self): "CELERY_BROKER_URL could not be built. Please set the following environment variables: " "CELERY_BROKER_BASE_URL (e.g., 'amqp://unstract-rabbitmq:5672//'), " "CELERY_BROKER_USER, and CELERY_BROKER_PASS. " - "See docker/sample.common.env for these shared vars." + "See docker/sample.env for these shared vars." ) if not self.celery_result_backend: @@ -436,7 +436,7 @@ def validate(self): "CELERY_RESULT_BACKEND could not be built. Please set the following environment variables: " "DB_HOST, DB_USER, DB_PASSWORD, DB_NAME, and DB_PORT. " "These are required for Celery to store task results. " - "See docker/sample.common.env for these shared vars." + "See docker/sample.env for these shared vars." ) # Cache Redis validation diff --git a/x2text-service/app/env.py b/x2text-service/app/env.py index fb197cec1d..b29c4914a4 100644 --- a/x2text-service/app/env.py +++ b/x2text-service/app/env.py @@ -48,7 +48,10 @@ class Env: DB_SCHEMA = EnvManager.get_required_setting("DB_SCHEMA") DB_HOST = EnvManager.get_required_setting("DB_HOST") DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) - DB_USER = EnvManager.get_required_setting("DB_USER") + # New name with fallback to legacy DB_USERNAME for rolling deploys + DB_USER = EnvManager.get_required_setting( + "DB_USER", os.environ.get("DB_USERNAME") + ) DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") DB_NAME = EnvManager.get_required_setting("DB_NAME") diff --git a/x2text-service/sample.env b/x2text-service/sample.env index 4f178b1499..8550ec652f 100644 --- a/x2text-service/sample.env +++ b/x2text-service/sample.env @@ -1,8 +1,8 @@ # ----------------------------------------------------------------------------- # LOCAL DEVELOPMENT NOTE: # Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) -# live in docker/sample.common.env. When running outside Docker, copy those vars -# here and replace container hostnames with localhost (see common.env header). +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). # ----------------------------------------------------------------------------- FLASK_ENV=production From 34f69a84d0da14426ac082e90e208ce1a3a63cbd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 11 Mar 2026 02:16:39 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/unstract/platform_service/env.py | 16 ++++------------ x2text-service/app/env.py | 4 +--- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index bf2641cf2d..2d37fa90be 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -14,23 +14,15 @@ class Env: REDIS_USER = os.environ.get("REDIS_USER") or os.environ.get("REDIS_USERNAME") REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") # DB vars: new names with fallback to legacy PG_BE_* names for rolling deploys - DB_HOST = EnvManager.get_required_setting( - "DB_HOST", os.environ.get("PG_BE_HOST") - ) + DB_HOST = EnvManager.get_required_setting("DB_HOST", os.environ.get("PG_BE_HOST")) DB_PORT = int( - EnvManager.get_required_setting( - "DB_PORT", os.environ.get("PG_BE_PORT", "5432") - ) - ) - DB_USER = EnvManager.get_required_setting( - "DB_USER", os.environ.get("PG_BE_USERNAME") + EnvManager.get_required_setting("DB_PORT", os.environ.get("PG_BE_PORT", "5432")) ) + DB_USER = EnvManager.get_required_setting("DB_USER", os.environ.get("PG_BE_USERNAME")) DB_PASSWORD = EnvManager.get_required_setting( "DB_PASSWORD", os.environ.get("PG_BE_PASSWORD") ) - DB_NAME = EnvManager.get_required_setting( - "DB_NAME", os.environ.get("PG_BE_DATABASE") - ) + DB_NAME = EnvManager.get_required_setting("DB_NAME", os.environ.get("PG_BE_DATABASE")) ENCRYPTION_KEY = EnvManager.get_required_setting("ENCRYPTION_KEY") MODEL_PRICES_URL = EnvManager.get_required_setting("MODEL_PRICES_URL") MODEL_PRICES_TTL_IN_DAYS = int( diff --git a/x2text-service/app/env.py b/x2text-service/app/env.py index b29c4914a4..c379217e98 100644 --- a/x2text-service/app/env.py +++ b/x2text-service/app/env.py @@ -49,9 +49,7 @@ class Env: DB_HOST = EnvManager.get_required_setting("DB_HOST") DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) # New name with fallback to legacy DB_USERNAME for rolling deploys - DB_USER = EnvManager.get_required_setting( - "DB_USER", os.environ.get("DB_USERNAME") - ) + DB_USER = EnvManager.get_required_setting("DB_USER", os.environ.get("DB_USERNAME")) DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") DB_NAME = EnvManager.get_required_setting("DB_NAME")