Skip to content

Commit d8939ac

Browse files
committed
try hive 4.0.1 with local pyiceberg
1 parent ad8263b commit d8939ac

3 files changed

Lines changed: 20 additions & 7 deletions

File tree

dev/Dockerfile

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ WORKDIR ${SPARK_HOME}
4040
ENV SPARK_VERSION=3.5.6
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
4242
ENV ICEBERG_VERSION=1.9.1
43-
ENV PYICEBERG_VERSION=0.9.1
4443

4544
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4645
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
@@ -55,18 +54,30 @@ RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-
5554
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
5655
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5756

58-
COPY spark-defaults.conf /opt/spark/conf
57+
COPY dev/spark-defaults.conf /opt/spark/conf
5958
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"
6059

6160
RUN chmod u+x /opt/spark/sbin/* && \
6261
chmod u+x /opt/spark/bin/*
6362

6463
RUN pip3 install -q ipython
6564

66-
RUN pip3 install "pyiceberg[s3fs,hive,pyarrow]==${PYICEBERG_VERSION}"
65+
# Copy the local pyiceberg source code and install locally
66+
COPY pyiceberg/ /tmp/pyiceberg/pyiceberg
67+
COPY pyproject.toml /tmp/pyiceberg/
68+
COPY build-module.py /tmp/pyiceberg/
69+
COPY vendor/ /tmp/pyiceberg/vendor
70+
COPY README.md /tmp/pyiceberg/
71+
COPY NOTICE /tmp/pyiceberg/
6772

68-
COPY entrypoint.sh .
69-
COPY provision.py .
73+
# Install pyiceberg from the copied source
74+
RUN cd /tmp/pyiceberg && pip3 install ".[s3fs,hive,pyarrow]"
75+
76+
# Clean up
77+
RUN rm -rf /tmp/pyiceberg
78+
79+
COPY dev/entrypoint.sh ${SPARK_HOME}/
80+
COPY dev/provision.py ${SPARK_HOME}/
7081

7182
ENTRYPOINT ["./entrypoint.sh"]
7283
CMD ["notebook"]

dev/docker-compose-integration.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ services:
1919
spark-iceberg:
2020
image: python-integration
2121
container_name: pyiceberg-spark
22-
build: .
22+
build:
23+
context: ..
24+
dockerfile: dev/Dockerfile
2325
networks:
2426
iceberg_net:
2527
depends_on:

dev/hive/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ENV AWS_SDK_BUNDLE=1.12.753
2323
RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar
2424
RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_BUNDLE}/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar -Lo /tmp/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar
2525

26-
FROM apache/hive:4.0.0
26+
FROM apache/hive:4.0.1
2727

2828
ENV HADOOP_VERSION=3.3.6
2929
ENV AWS_SDK_BUNDLE=1.12.753

0 commit comments

Comments
 (0)