diff --git a/bin/start-coordinator.sh b/bin/start-coordinator.sh index 2e790f8a02..f0245f51fd 100755 --- a/bin/start-coordinator.sh +++ b/bin/start-coordinator.sh @@ -47,17 +47,15 @@ done mkdir -p "${RSS_LOG_DIR}" mkdir -p "${RSS_PID_DIR}" -set +u -if [ $HADOOP_HOME ]; then +if [ -n "${HADOOP_HOME:-}" ]; then HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)" CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native" fi -if [ $HADOOP_CONF_DIR ]; then +if [ -n "${HADOOP_CONF_DIR:-}" ]; then CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR fi -set -u echo "class path is $CLASSPATH" diff --git a/bin/start-shuffle-server.sh b/bin/start-shuffle-server.sh index bc52c65eed..fb6a0b1eae 100755 --- a/bin/start-shuffle-server.sh +++ b/bin/start-shuffle-server.sh @@ -68,17 +68,15 @@ done mkdir -p "${RSS_LOG_DIR}" mkdir -p "${RSS_PID_DIR}" -set +u -if [ $HADOOP_HOME ]; then +if [ -n "${HADOOP_HOME:-}" ]; then HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)" CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native" fi -if [ "$HADOOP_CONF_DIR" ]; then +if [ -n "${HADOOP_CONF_DIR:-}" ]; then CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR fi -set -u echo "class path is $CLASSPATH" diff --git a/bin/utils.sh b/bin/utils.sh index 7398e2599f..de5bc2d852 100644 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -185,17 +185,15 @@ function load_rss_env { fi # export default value - set +o nounset - if [ -z "$HADOOP_CONF_DIR" ] && [ "$HADOOP_HOME" ]; then + if [ -z "${HADOOP_CONF_DIR:-}" ] && [ -n "${HADOOP_HOME:-}" ]; then HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" fi - if [ -z "$RSS_LOG_DIR" ]; then + if [ -z "${RSS_LOG_DIR:-}" ]; then RSS_LOG_DIR="${RSS_HOME}/logs" fi - if [ -z "$RSS_PID_DIR" ]; then + if [ -z "${RSS_PID_DIR:-}" ]; then RSS_PID_DIR="${RSS_HOME}" fi - set -o nounset RUNNER="${JAVA_HOME}/bin/java" JPS="${JAVA_HOME}/bin/jps" @@ -208,14 +206,12 @@ function load_rss_env { echo "Using RSS from ${RSS_HOME}" echo "Using RSS conf from ${RSS_CONF_DIR}" - set +u - if [ $HADOOP_HOME ]; then + if [ -n "${HADOOP_HOME:-}" ]; then echo "Using Hadoop from ${HADOOP_HOME}" fi - if [ $HADOOP_CONF_DIR ]; then + if [ -n "${HADOOP_CONF_DIR:-}" ]; then echo "Using Hadoop conf from ${HADOOP_CONF_DIR}" fi - set -u echo "Write log file to ${RSS_LOG_DIR}" echo "Write pid file to ${RSS_PID_DIR}" diff --git a/deploy/kubernetes/docker/Dockerfile b/deploy/kubernetes/docker/Dockerfile index 21c0d62f0a..7ea3662faa 100644 --- a/deploy/kubernetes/docker/Dockerfile +++ b/deploy/kubernetes/docker/Dockerfile @@ -34,21 +34,12 @@ RUN mkdir -p /data/rssadmin/ RUN chown -R rssadmin:rssadmin /data USER rssadmin -COPY rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz /data/rssadmin -RUN tar -xvf /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz -C /data/rssadmin -RUN mv /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION} /data/rssadmin/rss -RUN rm /data/rssadmin/rss/conf/rss-env.sh -RUN rm -rf /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz +COPY rss.tgz /data/rssadmin +RUN tar -xvf /data/rssadmin/rss.tgz -C /data/rssadmin +RUN rm -rf /data/rssadmin/rss.tgz -COPY rss-env.sh /data/rssadmin/rss/conf - -COPY start.sh /data/rssadmin/rss/bin - -COPY hadoop-${HADOOP_VERSION}.tar.gz /data/rssadmin -RUN tar -zxvf /data/rssadmin/hadoop-${HADOOP_VERSION}.tar.gz -C /data/rssadmin -RUN mv /data/rssadmin/hadoop-${HADOOP_VERSION} /data/rssadmin/hadoop -RUN rm -rf /data/rssadmin/hadoop-${HADOOP_VERSION}.tar.gz -COPY hadoopconfig/ /data/rssadmin/hadoop/etc/hadoop +COPY --chown=rssadmin:rssadmin --chmod=744 rss-env.sh /data/rssadmin/rss/conf +COPY --chown=rssadmin:rssadmin --chmod=744 start.sh /data/rssadmin/rss/bin ENV RSS_VERSION ${RSS_VERSION} ENV HADOOP_VERSION ${HADOOP_VERSION} diff --git a/deploy/kubernetes/docker/build.sh b/deploy/kubernetes/docker/build.sh index 58478c3520..0a5478fbf1 100755 --- a/deploy/kubernetes/docker/build.sh +++ b/deploy/kubernetes/docker/build.sh @@ -26,17 +26,19 @@ function exit_with_usage() { echo "./build.sh - Tool for building docker images of Remote Shuffle Service" echo "" echo "Usage:" - echo "+------------------------------------------------------------------------------------------------------+" - echo "| ./build.sh [--hadoop-version ] [--registry ] [--author ] |" - echo "| [--base-os-distribution ] [--base-image ] |" - echo "| [--push-image ] [--apache-mirror ] |" - echo "+------------------------------------------------------------------------------------------------------+" + echo "+---------------------------------------------------------------------------------------+" + echo "| ./build.sh [--hadoop-version ] [--hadoop-provided ] |" + echo "| [--registry ] [--author ] |" + echo "| [--base-os-distribution ] [--base-image ] |" + echo "| [--push-image ] [--apache-mirror ] |" + echo "+---------------------------------------------------------------------------------------+" exit 1 } REGISTRY="docker.io/library" HADOOP_VERSION=2.8.5 HADOOP_SHORT_VERSION=$(echo $HADOOP_VERSION | awk -F "." '{print $1"."$2}') +HADOOP_PROVIDED="true" AUTHOR=$(whoami) # If you are based in China, you could pass --apache-mirror when building this. APACHE_MIRROR="https://dlcdn.apache.org" @@ -52,6 +54,11 @@ while (( "$#" )); do ;; --hadoop-version) HADOOP_VERSION="$2" + HADOOP_SHORT_VERSION=$(echo $HADOOP_VERSION | awk -F "." '{print $1"."$2}') + shift + ;; + --hadoop-provided) + HADOOP_PROVIDED="$2" shift ;; --author) @@ -101,14 +108,15 @@ else echo "using base image(${BASE_IMAGE}) to build rss server" fi - -HADOOP_FILE=hadoop-${HADOOP_VERSION}.tar.gz -ARCHIVE_HADOOP_URL=https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE} -HADOOP_URL=${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE} -echo "HADOOP_URL is either ${HADOOP_URL} or ${ARCHIVE_HADOOP_URL}" -if [ ! -e "$HADOOP_FILE" ]; \ - then wget "${HADOOP_URL}" || wget "$ARCHIVE_HADOOP_URL"; \ - else echo "${HADOOP_FILE} has been downloaded"; \ +if [ "$HADOOP_PROVIDED" == "true" ]; then + HADOOP_FILE=hadoop-${HADOOP_VERSION}.tar.gz + ARCHIVE_HADOOP_URL=https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE} + HADOOP_URL=${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE} + echo "HADOOP_URL is either ${HADOOP_URL} or ${ARCHIVE_HADOOP_URL}" + if [ ! -e "$HADOOP_FILE" ]; \ + then wget "${HADOOP_URL}" || wget "$ARCHIVE_HADOOP_URL"; \ + else echo "${HADOOP_FILE} has been downloaded"; \ + fi fi RSS_DIR=../../.. @@ -117,13 +125,40 @@ RSS_VERSION=$(./mvnw help:evaluate -Dexpression=project.version 2>/dev/null | gr RSS_FILE=rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz echo "RSS_VERSION: $RSS_VERSION" echo "RSS_FILE: $RSS_FILE" -if [ ! -e "$RSS_FILE" ]; \ - then bash ./build_distribution.sh; \ - else echo "$RSS_FILE has been built"; \ +if [ ! -e "$RSS_FILE" ]; then + if [ "$HADOOP_PROVIDED" == "true" ]; then + if [ "$HADOOP_SHORT_VERSION" == "3.2" ]; then + HADOOP_PROFILE="-Phadoop-dependencies-provided -Pnetty-4.1.68.Final" + else + HADOOP_PROFILE="-Phadoop-dependencies-provided" + fi + else + HADOOP_PROFILE="-Phadoop-dependencies-included" + fi + bash ./build_distribution.sh --hadoop-profile hadoop${HADOOP_SHORT_VERSION} ${HADOOP_PROFILE:-} +else + echo "$RSS_FILE has been built" fi cd "$OLDPWD" || exit cp "$RSS_DIR/$RSS_FILE" . +# prepare rss.tgz, which will become the content of /data/rssadmin +if [ ! -e rss.tgz ]; then + rm -rf tmp; mkdir -p tmp; cd tmp + tar -xzf "../$RSS_FILE" + mv "${RSS_FILE/%.tgz/}" rss + + # add hadoop binaries to tgz + if [ "$HADOOP_PROVIDED" == "true" ]; then + tar -xzf "../hadoop-${HADOOP_VERSION}.tar.gz" + mv "hadoop-${HADOOP_VERSION}" hadoop + cp -r ../hadoopconfig/ hadoop/etc/hadoop + fi + tar -czf ../rss.tgz * + cd "$OLDPWD" || exit +fi + + GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD) GIT_COMMIT=$(git describe --dirty --always --tags | sed 's/-/./g') echo "image version: ${IMAGE_VERSION:=$RSS_VERSION-$GIT_COMMIT}" @@ -133,6 +168,7 @@ docker build --network=host -t "$IMAGE" \ --build-arg RSS_VERSION="$RSS_VERSION" \ --build-arg HADOOP_VERSION="$HADOOP_VERSION" \ --build-arg HADOOP_SHORT_VERSION="$HADOOP_SHORT_VERSION" \ + --build-arg HADOOP_PROVIDED="$HADOOP_PROVIDED" \ --build-arg AUTHOR="$AUTHOR" \ --build-arg GIT_COMMIT="$GIT_COMMIT" \ --build-arg GIT_BRANCH="$GIT_BRANCH" \ diff --git a/deploy/kubernetes/docker/rss-env.sh b/deploy/kubernetes/docker/rss-env.sh index d6b0dfc48c..72a5cd7aa9 100644 --- a/deploy/kubernetes/docker/rss-env.sh +++ b/deploy/kubernetes/docker/rss-env.sh @@ -21,7 +21,10 @@ set -o pipefail set -o nounset # exit the script if you try to use an uninitialised variable set -o errexit # exit the script if any statement returns a non-true return value -HADOOP_HOME="/data/rssadmin/hadoop" +if [ -d "/data/rssadmin/hadoop" ]; then + export HADOOP_HOME="/data/rssadmin/hadoop" +fi + RUNNER="${JAVA_HOME}/bin/java" JPS="${JAVA_HOME}/bin/jps" diff --git a/docs/operator/install.md b/docs/operator/install.md index ddb0eb1278..467d3ecdf9 100644 --- a/docs/operator/install.md +++ b/docs/operator/install.md @@ -34,6 +34,10 @@ Run the following command: cd deploy/kubernetes/docker && sh build.sh --registry ${our-registry} ``` +This compiles RSS with Hadoop 2.8 support und add the Hadoop binaries to the Docker image. +Use `--hadoop-version x.y.z` to choose a different Hadoop version. Use `--hadoop-provided false` to **not** +include the Hadoop installation in the image. + ## Creating or Updating CRD We can refer diff --git a/pom.xml b/pom.xml index e88c442568..b8e8127718 100644 --- a/pom.xml +++ b/pom.xml @@ -403,6 +403,14 @@ org.slf4j slf4j-log4j12 + + javax.xml.bind + jaxb-api + + + com.fasterxml.jackson.core + jackson-annotations + @@ -523,6 +531,14 @@ org.slf4j slf4j-log4j12 + + javax.xml.bind + jaxb-api + + + com.fasterxml.jackson.core + jackson-annotations + @@ -656,6 +672,10 @@ org.slf4j slf4j-log4j12 + + javax.xml.bind + jaxb-api + @@ -2293,6 +2313,12 @@ deploy/kubernetes + + hadoop-dependencies-provided + + provided + + hadoop-dependencies-included @@ -2329,6 +2355,15 @@ + + netty-4.1.68.Final + + false + + + 4.1.68.Final + + apache-release