diff --git a/Dockerfile b/Dockerfile index 02782d0c..c3250622 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.9 +ARG AIRFLOW_VERSION=1.10.12 ARG AIRFLOW_USER_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" @@ -27,6 +27,9 @@ ENV LC_MESSAGES en_US.UTF-8 # Disable noisy "Handling signal" log messages: # ENV GUNICORN_CMD_ARGS --log-level WARNING +# use source internal +COPY debian.source.txt /usr/local/debian.source.txt +RUN cat /usr/local/debian.source.txt > /etc/apt/sources.list RUN set -ex \ && buildDeps=' \ @@ -60,7 +63,8 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ - && pip install 'redis==3.2' \ + -i https://mirrors.aliyun.com/pypi/simple/ \ + && pip install 'redis==3.2' -i https://mirrors.aliyun.com/pypi/simple/ \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ diff --git a/README.md b/README.md index 922e51a7..ebab826d 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,10 @@ Easy scaling using docker-compose: This can be used to scale to a multi node setup using docker swarm. +or use command force: + + docker-compose -f docker-compose-CeleryExecutor.yml up --scale worker=3 --force-recreate --build + ## Running other airflow commands If you want to run other airflow sub-commands, such as `list_dags` or `clear` you can do so like this: diff --git a/config/airflow.cfg b/config/airflow.cfg index 9e4d5229..d52a8793 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -66,7 +66,7 @@ default_timezone = utc # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor -executor = SequentialExecutor +executor = CeleryExecutor # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information @@ -301,7 +301,7 @@ expose_stacktrace = True # Set to true to turn on authentication: # https://airflow.apache.org/security.html#web-authentication -authenticate = False +authenticate = True # Filter the list of dags by owner name (requires authentication to be enabled) filter_by_owner = False @@ -346,7 +346,7 @@ hide_paused_dags_by_default = False page_size = 100 # Use FAB-based webserver with RBAC feature -rbac = False +rbac = True # Define the color of navigation bar navbar_color = #007A87 diff --git a/debian.source.txt b/debian.source.txt new file mode 100644 index 00000000..2900505f --- /dev/null +++ b/debian.source.txt @@ -0,0 +1,8 @@ +deb http://mirrors.163.com/debian/ buster main non-free contrib +deb http://mirrors.163.com/debian/ buster-updates main non-free contrib +deb http://mirrors.163.com/debian/ buster-backports main non-free contrib +deb-src http://mirrors.163.com/debian/ buster main non-free contrib +deb-src http://mirrors.163.com/debian/ buster-updates main non-free contrib +deb-src http://mirrors.163.com/debian/ buster-backports main non-free contrib +deb http://mirrors.163.com/debian-security/ buster/updates main non-free contrib +deb-src http://mirrors.163.com/debian-security/ buster/updates main non-free contrib diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index de4f5dac..ca8be9ca 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.9 + image: movingheart/docker-airflow:1.10.12 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.9 + image: movingheart/docker-airflow:1.10.12 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.9 + image: movingheart/docker-airflow:1.10.12 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.9 + image: movingheart/docker-airflow:1.10.12 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 26e9e92e..3378d1c8 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -12,7 +12,7 @@ services: max-file: "3" webserver: - image: puckel/docker-airflow:1.10.9 + image: puckel/docker-airflow:1.10.12 restart: always depends_on: - postgres diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 166f4837..81677335 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -10,7 +10,11 @@ TRY_LOOP="20" # Global defaults and back-compat : "${AIRFLOW_HOME:="/usr/local/airflow"}" : "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}" -: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}" +: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR}Executor}" + +echo $AIRFLOW__CORE__EXECUTOR >> a.log +echo $AIRFLOW__CORE__FERNET_KEY >> a.log +echo $AIRFLOW__CORE__LOAD_EXAMPLES >> a.log # Load DAGs examples (default: Yes) if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]]; then @@ -44,8 +48,10 @@ wait_for_port() { # Other executors than SequentialExecutor drive the need for an SQL database, here PostgreSQL is used if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then + echo "111$AIRFLOW__CORE__LOAD_EXAMPLES" >> a.log # Check if the user has provided explicit Airflow configuration concerning the database if [ -z "$AIRFLOW__CORE__SQL_ALCHEMY_CONN" ]; then + echo "222$AIRFLOW__CORE__SQL_ALCHEMY_CONN" >> a.log # Default values corresponding to the default compose files : "${POSTGRES_HOST:="postgres"}" : "${POSTGRES_PORT:="5432"}" @@ -56,7 +62,7 @@ if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}${POSTGRES_EXTRAS}" export AIRFLOW__CORE__SQL_ALCHEMY_CONN - + echo "333$AIRFLOW__CORE__SQL_ALCHEMY_CONN" >> a.log # Check if the user has provided explicit Airflow configuration for the broker's connection to the database if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}${POSTGRES_EXTRAS}" @@ -97,6 +103,7 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then AIRFLOW__CELERY__BROKER_URL="${REDIS_PROTO}${REDIS_PREFIX}${REDIS_HOST}:${REDIS_PORT}/${REDIS_DBNUM}" export AIRFLOW__CELERY__BROKER_URL + echo "444$AIRFLOW__CORE__SQL_ALCHEMY_CONN" >> a.log else # Derive useful variables from the AIRFLOW__ variables provided explicitly by the user REDIS_ENDPOINT=$(echo -n "$AIRFLOW__CELERY__BROKER_URL" | cut -d '/' -f3 | sed -e 's,.*@,,') @@ -109,20 +116,27 @@ fi case "$1" in webserver) + echo "555$AIRFLOW__CORE__SQL_ALCHEMY_CONN" >> a.log + echo "$1" >> a.log + echo webserver >> a.log airflow initdb + airflow create_user -r Admin -u airflow -e airflow@example.com -f air -l flow -p airflow123 if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ] || [ "$AIRFLOW__CORE__EXECUTOR" = "SequentialExecutor" ]; then # With the "Local" and "Sequential" executors it should all run in one container. airflow scheduler & fi + echo "666$AIRFLOW__CORE__EXECUTOR" >> a.log exec airflow webserver ;; worker|scheduler) # Give the webserver time to run initdb. sleep 10 + echo "777$AIRFLOW__CORE__EXECUTOR" >> a.log exec airflow "$@" ;; flower) sleep 10 + echo "888$AIRFLOW__CORE__EXECUTOR" >> a.log exec airflow "$@" ;; version)