diff --git a/dev/Dockerfile b/dev/Dockerfile index 5292e26421..f0539b15d2 100644 --- a/dev/Dockerfile +++ b/dev/Dockerfile @@ -39,20 +39,20 @@ WORKDIR ${SPARK_HOME} # Remember to also update `tests/conftest`'s spark setting ENV SPARK_VERSION=3.5.6 ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12 -ENV ICEBERG_VERSION=1.9.1 +ENV ICEBERG_VERSION=1.10.0 ENV PYICEBERG_VERSION=0.9.1 +ENV BASE_ARTIFACT_URL=https://repository.apache.org/content/repositories/orgapacheiceberg-1243 RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ && tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \ && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz # Download iceberg spark runtime -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ +RUN curl --retry 5 -s ${BASE_ARTIFACT_URL}/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ -Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar - # Download AWS bundle -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \ +RUN curl --retry 5 -s ${BASE_ARTIFACT_URL}/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \ -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar COPY spark-defaults.conf /opt/spark/conf diff --git a/tests/conftest.py b/tests/conftest.py index 5aff45c1ed..b6364e5c68 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2534,7 +2534,7 @@ def spark() -> "SparkSession": # Remember to also update `dev/Dockerfile` spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2]) scala_version = "2.12" - iceberg_version = "1.9.2" + iceberg_version = "1.10.0" hadoop_version = "3.3.4" aws_sdk_version = "1.12.753" @@ -2551,6 +2551,7 @@ def spark() -> "SparkSession": spark = ( SparkSession.builder.appName("PyIceberg integration test") + .config("spark.jars.repositories", "https://repository.apache.org/content/repositories/orgapacheiceberg-1243/") .config("spark.sql.session.timeZone", "UTC") .config("spark.sql.shuffle.partitions", "1") .config("spark.default.parallelism", "1")