Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions bin/single-node/.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

# Public host and ports exposed by the deployment
TEXERA_HOST=http://localhost
TEXERA_PORT=8080
MINIO_PORT=9000
Expand All @@ -27,37 +28,58 @@ TEXERA_SERVICE_LOG_LEVEL=INFO
IMAGE_REGISTRY=ghcr.io/apache
IMAGE_TAG=latest

# Admin credentials for Texera
USER_SYS_ADMIN_USERNAME=texera
USER_SYS_ADMIN_PASSWORD=texera

# Postgres root credentials
POSTGRES_USER=texera
POSTGRES_PASSWORD=password

MINIO_ROOT_USER=texera_minio
MINIO_ROOT_PASSWORD=password
# S3 (MinIO) credentials
STORAGE_S3_AUTH_USERNAME=texera_minio
STORAGE_S3_AUTH_PASSWORD=password

# LakeFS server configuration
LAKEFS_INSTALLATION_USER_NAME=texera-admin
LAKEFS_INSTALLATION_ACCESS_KEY_ID=AKIAIOSFOLKFSSAMPLES
LAKEFS_INSTALLATION_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
LAKEFS_BLOCKSTORE_TYPE=s3
LAKEFS_BLOCKSTORE_S3_FORCE_PATH_STYLE=true
LAKEFS_BLOCKSTORE_S3_ENDPOINT=http://texera-minio:9000
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=texera_minio
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=password
LAKEFS_AUTH_ENCRYPT_SECRET_KEY=random_string_for_lakefs
LAKEFS_LOGGING_LEVEL=INFO
LAKEFS_STATS_ENABLED=1
LAKEFS_DATABASE_TYPE=postgres
LAKEFS_DATABASE_POSTGRES_CONNECTION_STRING=postgres://texera:password@texera-postgres:5432/texera_lakefs?sslmode=disable

# Lakekeeper server configuration
LAKEKEEPER__PG_DATABASE_URL_READ=postgres://texera:password@texera-postgres:5432/texera_lakekeeper
LAKEKEEPER__PG_DATABASE_URL_WRITE=postgres://texera:password@texera-postgres:5432/texera_lakekeeper
LAKEKEEPER__PG_ENCRYPTION_KEY=texera_key
LAKEKEEPER_BASE_URI=http://texera-lakekeeper:8181

# Texera storage endpoints
STORAGE_S3_ENDPOINT=http://texera-minio:9000
STORAGE_S3_REGION=us-west-2
STORAGE_LAKEFS_ENDPOINT=http://texera-lakefs:8000/api/v1
STORAGE_JDBC_URL=jdbc:postgresql://texera-postgres:5432/texera_db?currentSchema=texera_db,public
STORAGE_JDBC_USERNAME=texera
STORAGE_JDBC_PASSWORD=password
FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT=http://file-service:9092/api/dataset/presign-download
FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/dataset/did/upload

# Iceberg catalog selector (valid values: rest, postgres)
STORAGE_ICEBERG_CATALOG_TYPE=rest

# Iceberg REST catalog client configuration
STORAGE_ICEBERG_CATALOG_REST_URI=http://texera-lakekeeper:8181/catalog
STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME=texera
STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET=texera-iceberg

# Postgres-backed Iceberg catalog
STORAGE_ICEBERG_CATALOG_POSTGRES_URI_WITHOUT_SCHEME=texera-postgres:5432/texera_iceberg_catalog
STORAGE_ICEBERG_CATALOG_POSTGRES_USERNAME=texera
STORAGE_ICEBERG_CATALOG_POSTGRES_PASSWORD=password

# Admin credentials for Texera (used for login and example data loading)
USER_SYS_ADMIN_USERNAME=texera
USER_SYS_ADMIN_PASSWORD=texera
# File service endpoints
FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT=http://file-service:9092/api/dataset/presign-download
FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT=http://file-service:9092/api/dataset/did/upload
160 changes: 160 additions & 0 deletions bin/single-node/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,37 @@ services:
- "${MINIO_PORT:-9000}:9000"
env_file:
- .env
environment:
- MINIO_ROOT_USER=${STORAGE_S3_AUTH_USERNAME}
- MINIO_ROOT_PASSWORD=${STORAGE_S3_AUTH_PASSWORD}
volumes:
- minio_data:/data
command: server --console-address ":9001" /data
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:9000/minio/health/live"]
interval: 5s
timeout: 3s
retries: 10

# One-shot init container that creates the Iceberg warehouse bucket on first
# startup. MinIO's server image has no built-in "create bucket on boot" flag,
# so this sidecar uses the official `mc` client to do it idempotently.
minio-init:
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: texera-minio-init
depends_on:
minio:
condition: service_healthy
env_file:
- .env
restart: "no"
entrypoint: ["/bin/sh", "-c"]
command:
- |
set -e
mc alias set local "$$STORAGE_S3_ENDPOINT" "$$STORAGE_S3_AUTH_USERNAME" "$$STORAGE_S3_AUTH_PASSWORD"
mc mb --ignore-existing "local/$$STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET"
echo "MinIO bucket '$$STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET' is ready."

# PostgreSQL with PGroonga extension for full-text search.
# Used by lakeFS and Texera's metadata storage.
Expand Down Expand Up @@ -63,6 +91,8 @@ services:
environment:
# This port also need to be changed if the port of MinIO service is changed
- LAKEFS_BLOCKSTORE_S3_PRE_SIGNED_ENDPOINT=${TEXERA_HOST}:${MINIO_PORT:-9000}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID=${STORAGE_S3_AUTH_USERNAME}
- LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY=${STORAGE_S3_AUTH_PASSWORD}
entrypoint: ["/bin/sh", "-c"]
command:
- |
Expand All @@ -75,6 +105,132 @@ services:
timeout: 5s
retries: 10

# Lakekeeper migration init container
# This runs once to migrate the database before the lakekeeper server starts
lakekeeper-migrate:
image: vakamo/lakekeeper:v0.11.0
container_name: texera-lakekeeper-migrate
depends_on:
postgres:
condition: service_healthy
env_file:
- .env
restart: "no"
entrypoint: ["/home/nonroot/lakekeeper"]
command: ["migrate"]

# Lakekeeper is the Iceberg REST catalog service
lakekeeper:
image: vakamo/lakekeeper:v0.11.0
container_name: texera-lakekeeper
restart: always
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_started
lakekeeper-migrate:
condition: service_completed_successfully
env_file:
- .env
entrypoint: ["/home/nonroot/lakekeeper"]
command: ["serve"]
healthcheck:
test: ["CMD", "/home/nonroot/lakekeeper", "healthcheck"]
interval: 10s
timeout: 5s
retries: 10
start_period: 10s

# One-shot init container that creates the Lakekeeper default project and
# the Iceberg warehouse pointing at the MinIO bucket prepared by minio-init.
lakekeeper-init:
image: alpine:3.19
container_name: texera-lakekeeper-init
depends_on:
lakekeeper:
condition: service_healthy
minio-init:
condition: service_completed_successfully
env_file:
- .env
restart: "no"
entrypoint: [ "/bin/sh", "-c" ]
command:
- |
set -e

echo "Installing dependencies..."
apk add --no-cache curl ca-certificates

check_status() {
if [ "$$1" -ge 200 ] && [ "$$1" -lt 300 ]; then
echo "Created $$2 successfully (HTTP $$1)."
elif [ "$$1" -eq 409 ]; then
echo "$$2 already exists (HTTP 409). Treating as success."
else
echo "Failed to create $$2. HTTP Code: $$1"
echo "ERROR RESPONSE:"
if [ -f /tmp/response.txt ]; then cat /tmp/response.txt; fi
echo ""
exit 1
fi
}

echo "Step 1: Initializing Default Project..."
PROJECT_PAYLOAD='{"project-id": "00000000-0000-0000-0000-000000000000", "project-name": "default"}'

PROJECT_CODE=$$(curl -s -o /tmp/response.txt -w "%{http_code}" \
-X POST \
-H "Content-Type: application/json" \
-d "$$PROJECT_PAYLOAD" \
"$$LAKEKEEPER_BASE_URI/management/v1/project" || echo "000")

check_status "$$PROJECT_CODE" "Default Project"


echo "Step 2: Initializing Warehouse '$$STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME'..."
CREATE_PAYLOAD=$$(cat <<EOF
{
"warehouse-name": "$$STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME",
"project-id": "00000000-0000-0000-0000-000000000000",
"storage-profile": {
"type": "s3",
"bucket": "$$STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET",
"region": "$$STORAGE_S3_REGION",
"endpoint": "$$STORAGE_S3_ENDPOINT",
"flavor": "s3-compat",
"path-style-access": true,
"sts-enabled": false
},
"storage-credential": {
"type": "s3",
"credential-type": "access-key",
"aws-access-key-id": "$$STORAGE_S3_AUTH_USERNAME",
"aws-secret-access-key": "$$STORAGE_S3_AUTH_PASSWORD"
}
}
EOF
)

WAREHOUSE_CODE=$$(curl -s -o /tmp/response.txt -w "%{http_code}" \
-X POST \
-H "Content-Type: application/json" \
-d "$$CREATE_PAYLOAD" \
"$$LAKEKEEPER_BASE_URI/management/v1/warehouse" || echo "000")

# Lakekeeper returns 400 CreateWarehouseStorageProfileOverlap when a
# warehouse with the same name + storage profile already exists.
# Treat that as success so this init is idempotent across restarts.
if [ "$$WAREHOUSE_CODE" = "400" ] && grep -q "CreateWarehouseStorageProfileOverlap" /tmp/response.txt 2>/dev/null; then
echo "Lakekeeper Warehouse already exists (storage profile overlap). Treating as success."
else
check_status "$$WAREHOUSE_CODE" "Lakekeeper Warehouse"
fi

echo "Initialization sequence completed successfully!"


# Part2: Specification of Texera's micro-services
# FileService provides endpoints for Texera's dataset management
file-service:
Expand Down Expand Up @@ -166,6 +322,10 @@ services:
depends_on:
workflow-compiling-service:
condition: service_started
lakekeeper:
condition: service_healthy
lakekeeper-init:
condition: service_completed_successfully
env_file:
- .env
volumes:
Expand Down
Loading