Skip to content

Commit b4071eb

Browse files
add start_hdfs_fuse.sh
1 parent 26f1f76 commit b4071eb

2 files changed

Lines changed: 153 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ spark.dockerfile
1818
deps.dockerfile
1919
worker.dockerfile
2020
etl.dockerfile
21+
hdfs-fuse.dockerfile
2122
# we don't put binary file to git repo
2223
gradle-wrapper.jar
2324
VersionUtils.java

docker/start_hdfs_fuse.sh

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/bin/bash
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
declare -r DOCKER_FOLDER=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
18+
source $DOCKER_FOLDER/docker_build_common.sh
19+
20+
# ===============================[global variables]===============================
21+
declare -r VERSION=${VERSION:-3.3.4}
22+
declare -r REPO=${REPO:-ghcr.io/skiptests/astraea/hdfs_fuse}
23+
declare -r IMAGE_NAME="$REPO:$VERSION"
24+
declare -r DOCKERFILE=$DOCKER_FOLDER/hdfs-fuse.dockerfile
25+
declare -r CONTAINER_NAME="hdfs-fuse"
26+
27+
# ===================================[functions]===================================
28+
29+
function showHelp() {
30+
echo "Usage: [ENV] start_hdfs_fuse.sh"
31+
echo "ENV: "
32+
echo " REPO=astraea/datanode set the docker repo"
33+
echo " VERSION=3.3.4 set version of hadoop distribution"
34+
echo " BUILD=false set true if you want to build image locally"
35+
echo " RUN=false set false if you want to build/pull image only"
36+
}
37+
38+
function generateDockerfile() {
39+
echo "#this dockerfile is generated dynamically
40+
FROM ubuntu:22.04 AS build
41+
42+
#install tools
43+
RUN apt-get update && apt-get install -y wget
44+
45+
#download hadoop
46+
WORKDIR /tmp
47+
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}-src.tar.gz
48+
RUN mkdir /opt/hadoop-src
49+
RUN tar -zxvf hadoop-${VERSION}-src.tar.gz -C /opt/hadoop-src --strip-components=1
50+
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}.tar.gz
51+
RUN mkdir /opt/hadoop
52+
RUN tar -zxvf hadoop-${VERSION}.tar.gz -C /opt/hadoop --strip-components=1
53+
54+
FROM ubuntu:22.04 AS buildsrc
55+
56+
#install tools
57+
RUN apt-get update \\
58+
&& apt-get install -y openjdk-11-jdk \\
59+
maven \\
60+
build-essential \\
61+
autoconf \\
62+
automake \\
63+
libtool \\
64+
cmake \\
65+
zlib1g-dev \\
66+
pkg-config \\
67+
libssl-dev \\
68+
libsasl2-dev \\
69+
g++ \\
70+
curl \\
71+
libfuse-dev
72+
73+
WORKDIR /tmp
74+
RUN curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz \\
75+
&& mkdir /opt/protobuf-3.7-src \\
76+
&& tar -zxf protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src && cd /opt/protobuf-3.7-src \\
77+
&& ./configure --prefix=/usr/ \\
78+
&& make -j\$(nproc) \\
79+
&& make install
80+
81+
WORKDIR /tmp
82+
RUN curl -L https://sourceforge.net/projects/boost/files/boost/1.80.0/boost_1_80_0.tar.bz2/download > boost_1_80_0.tar.bz2 \\
83+
&& tar --bzip2 -xf boost_1_80_0.tar.bz2 -C /opt && cd /opt/boost_1_80_0 \\
84+
&& ./bootstrap.sh --prefix=/usr/ \\
85+
&& ./b2 --without-python \\
86+
&& ./b2 --without-python install
87+
88+
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
89+
90+
#copy hadoop
91+
COPY --from=build /opt/hadoop-src /opt/hadoop
92+
WORKDIR /opt/hadoop
93+
RUN mvn clean package -pl hadoop-hdfs-project/hadoop-hdfs-native-client -Pnative -DskipTests -Drequire.fuse=true
94+
95+
FROM ubuntu:22.04
96+
97+
#install tools
98+
RUN apt-get update && apt-get install -y openjdk-11-jre fuse
99+
100+
#copy hadoop
101+
COPY --from=build /opt/hadoop /opt/hadoop
102+
COPY --from=buildsrc /opt/hadoop /opt/hadoop
103+
104+
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
105+
ENV HADOOP_HOME /opt/hadoop
106+
107+
RUN echo \"user_allow_other\" >> /etc/fuse.conf
108+
109+
WORKDIR /opt/hadoop/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs
110+
RUN sed -i -e '18aexport CLASSPATH=\\\${HADOOP_HOME}/etc/hadoop:\`find \\\${HADOOP_HOME}/share/hadoop/ | awk '\"'\"'{path=path\":\"\\\$0}END{print path}'\"'\"'\`' \\
111+
-i -e '18aexport LD_LIBRARY_PATH=\\\${HADOOP_HOME}/lib/native:\\\$LD_LIBRARY_PATH' \\
112+
-i -e 's#export LIBHDFS_PATH=.*#export LIBHDFS_PATH=\\\${HADOOP_HOME}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/native/target/usr/local/lib#' \\
113+
-i -e 's/find \"\\\$HADOOP_HOME\/hadoop-client\" -name \"\\*.jar\"/find \"\\\$HADOOP_HOME\/hadoop-client-modules\/hadoop-client\" -name \"\\*.jar\"/g' fuse_dfs_wrapper.sh
114+
115+
#add user
116+
RUN groupadd astraea && useradd -ms /bin/bash -g astraea astraea
117+
118+
RUN mkdir /mnt/hdfs
119+
120+
#change user
121+
RUN chown -R $USER:$USER /opt/hadoop /mnt/hdfs
122+
USER $USER
123+
124+
" >"$DOCKERFILE"
125+
}
126+
127+
# ===================================[main]===================================
128+
129+
checkDocker
130+
buildImageIfNeed "$IMAGE_NAME"
131+
if [[ "$RUN" != "true" ]]; then
132+
echo "docker image: $IMAGE_NAME is created"
133+
exit 0
134+
fi
135+
136+
checkNetwork
137+
138+
if [[ $# -gt 0 ]]; then
139+
HDFS=$1
140+
HDFS_PORT="${HDFS: -5}"
141+
fi
142+
143+
docker run -d --init \
144+
--name $CONTAINER_NAME-$HDFS_PORT \
145+
--device /dev/fuse \
146+
--cap-add SYS_ADMIN \
147+
--security-opt apparmor:unconfined \
148+
"$IMAGE_NAME" /bin/bash -c "./fuse_dfs_wrapper.sh -d $HDFS /mnt/hdfs"
149+
150+
echo "================================================="
151+
echo "run docker exec -it -w /mnt/hdfs $CONTAINER_NAME-$HDFS_PORT /bin/bash to access fuse"
152+
echo "================================================="

0 commit comments

Comments
 (0)