diff --git a/backend/sample.env b/backend/sample.env index 377016fdec..743a31de8e 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -64,7 +64,9 @@ SESSION_EXPIRATION_TIME_IN_SECOND=7200 WEB_APP_ORIGIN_URL="http://frontend.unstract.localhost" # API keys for trusted services -INTERNAL_SERVICE_API_KEY= +# Workers send this as X-API-Key on internal calls to backend. +# Must match workers/sample.env INTERNAL_SERVICE_API_KEY; rotate in both for prod. +INTERNAL_SERVICE_API_KEY=dev-internal-key-123 # Unstract Core envs BUILTIN_FUNCTIONS_API_KEY= @@ -199,7 +201,8 @@ API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_ur #Remote storage related envs PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" +TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +REMOTE_PROMPT_STUDIO_FILE_PATH=unstract/prompt-studio-data # Storage Provider for Tool registry TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}' diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index b7b49a7618..5884b88994 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -2,9 +2,21 @@ name: ${COMPOSE_PROJECT_NAME:-docker} include: - docker-compose-dev-essentials.yaml +# Reusable host-gateway mapping so containers can reach services on the host +# (e.g. host-installed Ollama at http://host.docker.internal:11434). +# NOTE: YAML merge (<<:) merges mappings shallowly and does NOT concatenate +# lists. If a service ever needs additional extra_hosts entries alongside +# this one, inline ALL entries under that service (including +# host.docker.internal:host-gateway) instead of using the anchor — a sibling +# extra_hosts key would silently shadow the anchor's list. +x-host-gateway: &host_gateway + extra_hosts: + - "host.docker.internal:host-gateway" + services: # Backend service backend: + <<: *host_gateway image: unstract/backend:${VERSION} container_name: unstract-backend restart: unless-stopped @@ -34,12 +46,10 @@ services: - traefik.enable=true - traefik.http.routers.backend.rule=Host(`frontend.unstract.localhost`) && (PathPrefix(`/api/v1`) || PathPrefix(`/deployment`) || PathPrefix(`/public`)) - traefik.http.services.backend.loadbalancer.server.port=8000 - extra_hosts: - # "host-gateway" is a special string that translates to host docker0 i/f IP. - - "host.docker.internal:host-gateway" # Celery worker for dashboard metrics processing worker-metrics: + <<: *host_gateway image: unstract/backend:${VERSION} container_name: unstract-worker-metrics restart: unless-stopped @@ -61,6 +71,7 @@ services: # Processes post-execution callbacks via InternalAPIClient (no Django). # Handles: ide_index_complete/error, ide_prompt_complete/error. worker-ide-callback: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-ide-callback restart: unless-stopped @@ -82,6 +93,7 @@ services: # Celery Flower celery-flower: + <<: *host_gateway image: unstract/backend:${VERSION} container_name: unstract-celery-flower restart: unless-stopped @@ -105,6 +117,7 @@ services: # Celery Beat celery-beat: + <<: *host_gateway image: unstract/backend:${VERSION} container_name: unstract-celery-beat restart: unless-stopped @@ -152,6 +165,7 @@ services: - traefik.enable=false prompt-service: + <<: *host_gateway image: unstract/prompt-service:${VERSION} container_name: unstract-prompt-service restart: unless-stopped @@ -166,9 +180,6 @@ services: - ../prompt-service/.env labels: - traefik.enable=false - extra_hosts: - # "host-gateway" is a special string that translates to host docker0 i/f IP. - - "host.docker.internal:host-gateway" x2text-service: image: unstract/x2text-service:${VERSION} @@ -184,6 +195,7 @@ services: - traefik.enable=false runner: + <<: *host_gateway image: unstract/runner:${VERSION} container_name: unstract-runner restart: unless-stopped @@ -206,6 +218,7 @@ services: # ==================================================================== worker-api-deployment-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-api-deployment-v2 restart: unless-stopped @@ -238,6 +251,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-callback-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-callback-v2 restart: unless-stopped @@ -264,6 +278,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-file-processing-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-file-processing-v2 restart: unless-stopped @@ -316,6 +331,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-general-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-general-v2 restart: unless-stopped @@ -343,6 +359,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-notification-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-notification-v2 restart: unless-stopped @@ -391,6 +408,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-log-consumer-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-log-consumer-v2 restart: unless-stopped @@ -440,6 +458,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-log-history-scheduler-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-log-history-scheduler-v2 restart: unless-stopped @@ -463,6 +482,7 @@ services: - traefik.enable=false worker-scheduler-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-scheduler-v2 restart: unless-stopped @@ -507,6 +527,7 @@ services: - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config worker-executor-v2: + <<: *host_gateway image: unstract/worker-unified:${VERSION} container_name: unstract-worker-executor-v2 restart: unless-stopped diff --git a/run-platform.sh b/run-platform.sh index a2b793a131..40181806f8 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -33,6 +33,28 @@ check_dependencies() { echo "$red_text""docker not found. Exiting.""$default_text" exit 1 fi + if ! docker info >/dev/null 2>&1; then + echo "$red_text""Cannot connect to the Docker daemon.""$default_text" + case "$(uname -s)" in + Linux*) + echo " On Linux (daemon access via the 'docker' group):" + echo " - Check group membership: getent group docker" + echo " - Add your user to it: sudo usermod -aG docker \$USER" + echo " - Activate in current shell: newgrp docker" + echo " - For new shells, a full desktop logout (not just terminal close) is required." + ;; + Darwin*) + echo " On macOS: ensure Docker Desktop is running (whale icon in the menu bar)." + ;; + MINGW*|MSYS*|CYGWIN*) + echo " On Windows: ensure Docker Desktop is running and WSL integration is enabled if applicable." + ;; + *) + echo " Ensure the Docker daemon is running and your user can reach its socket." + ;; + esac + exit 1 + fi # For 'docker compose' vs 'docker-compose', see https://stackoverflow.com/a/66526176. docker compose >/dev/null 2>&1 if [ $? -eq 0 ]; then diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json index c241e5e711..e5434a095e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json @@ -23,7 +23,7 @@ "type": "string", "title": "Base URL", "default": "", - "description": "Provide the base URL where Ollama server is running. Example: `http://docker.host.internal:11434` or `http://localhost:11434`" + "description": "Provide the base URL where Ollama server is running. Example: `http://host.docker.internal:11434` or `http://localhost:11434`" }, "max_retries": { "type": "number", diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json index 3c8a4a5f16..8f2b9db7a1 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json @@ -23,7 +23,7 @@ "type": "string", "title": "Base URL", "default": "", - "description": "Provide the base URL where Ollama server is running. Example: http://docker.host.internal:11434 or http://localhost:11434" + "description": "Provide the base URL where Ollama server is running. Example: http://host.docker.internal:11434 or http://localhost:11434" }, "max_tokens": { "type": "number", diff --git a/unstract/sdk1/src/unstract/sdk1/file_storage/env_helper.py b/unstract/sdk1/src/unstract/sdk1/file_storage/env_helper.py index 22b5210942..830e7a1c34 100644 --- a/unstract/sdk1/src/unstract/sdk1/file_storage/env_helper.py +++ b/unstract/sdk1/src/unstract/sdk1/file_storage/env_helper.py @@ -31,22 +31,42 @@ def get_storage(storage_type: StorageType, env_name: str) -> FileStorage: FileStorage: FIleStorage instance initialised using the provider and credentials configured in the env """ + raw = os.environ.get(env_name) + if not raw: + raise FileStorageError( + f"Required env var '{env_name}' is unset or empty. " + f"Expected JSON config of the form: {EnvHelper.ENV_CONFIG_FORMAT}" + ) + try: + file_storage_creds = json.loads(raw) + except json.JSONDecodeError as e: + raise FileStorageError( + f"Env var '{env_name}' is not valid JSON: {e}. " + f"Expected: {EnvHelper.ENV_CONFIG_FORMAT}" + ) from e + if not isinstance(file_storage_creds, dict): + raise FileStorageError( + f"Env var '{env_name}' must be a JSON object. " + f"Expected: {EnvHelper.ENV_CONFIG_FORMAT}" + ) try: - file_storage_creds = json.loads(os.environ.get(env_name, "")) provider = FileStorageProvider(file_storage_creds[CredentialKeyword.PROVIDER]) - credentials = file_storage_creds.get(CredentialKeyword.CREDENTIALS, {}) - if storage_type == StorageType.PERMANENT: - file_storage = PermanentFileStorage(provider=provider, **credentials) - elif storage_type == StorageType.SHARED_TEMPORARY: - file_storage = SharedTemporaryFileStorage( - provider=provider, **credentials - ) - else: - raise NotImplementedError() - return file_storage - except KeyError as e: - logger.error(f"Required credentials are missing in the env: {str(e)}") + except (KeyError, ValueError) as e: + logger.error(f"Invalid storage configuration in env: {str(e)}") logger.error(f"The configuration format is {EnvHelper.ENV_CONFIG_FORMAT}") - raise e - except FileStorageError as e: - raise e + raise FileStorageError( + f"Invalid storage configuration in env var '{env_name}': {e}. " + f"Expected: {EnvHelper.ENV_CONFIG_FORMAT}" + ) from e + credentials = file_storage_creds.get(CredentialKeyword.CREDENTIALS, {}) + if not isinstance(credentials, dict): + raise FileStorageError( + f"Env var '{env_name}' field '{CredentialKeyword.CREDENTIALS}' " + f"must be a JSON object. Expected: {EnvHelper.ENV_CONFIG_FORMAT}" + ) + if storage_type == StorageType.PERMANENT: + return PermanentFileStorage(provider=provider, **credentials) + elif storage_type == StorageType.SHARED_TEMPORARY: + return SharedTemporaryFileStorage(provider=provider, **credentials) + else: + raise NotImplementedError() diff --git a/workers/sample.env b/workers/sample.env index 4764fb0c5a..7d9a70721f 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -316,6 +316,12 @@ UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +# Remote storage for Prompt Studio / IDE flows. Must match backend/sample.env. +# Required by executor and ide-callback workers; missing/empty values raise FileStorageError in EnvHelper.get_storage(). +PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +REMOTE_PROMPT_STUDIO_FILE_PATH=unstract/prompt-studio-data + # File Execution Configuration WORKFLOW_EXECUTION_DIR_PREFIX=unstract/execution API_EXECUTION_DIR_PREFIX=unstract/api