@@ -2502,9 +2502,14 @@ def spark() -> "SparkSession":
25022502 spark_version = "." .join (importlib .metadata .version ("pyspark" ).split ("." )[:2 ])
25032503 scala_version = "2.12"
25042504 iceberg_version = "1.9.0"
2505+ # Should match with Spark:
2506+ hadoop_version = "3.3.4"
2507+ aws_sdk_version = "1.12.753"
25052508
25062509 os .environ ["PYSPARK_SUBMIT_ARGS" ] = (
25072510 f"--packages org.apache.iceberg:iceberg-spark-runtime-{ spark_version } _{ scala_version } :{ iceberg_version } ,"
2511+ f"org.apache.hadoop:hadoop-aws:{ hadoop_version } ,"
2512+ f"com.amazonaws:aws-java-sdk-bundle:{ aws_sdk_version } ,"
25082513 f"org.apache.iceberg:iceberg-aws-bundle:{ iceberg_version } pyspark-shell"
25092514 )
25102515 os .environ ["AWS_REGION" ] = "us-east-1"
@@ -2518,6 +2523,8 @@ def spark() -> "SparkSession":
25182523 .config ("spark.sql.shuffle.partitions" , "1" )
25192524 .config ("spark.default.parallelism" , "1" )
25202525 .config ("spark.sql.extensions" , "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" )
2526+ .config ("spark.hadoop.fs.s3a.endpoint" , "http://localhost:9000" )
2527+ .config ("spark.hadoop.fs.s3a.path.style.access" , "true" )
25212528 .config ("spark.sql.catalog.integration" , "org.apache.iceberg.spark.SparkCatalog" )
25222529 .config ("spark.sql.catalog.integration.catalog-impl" , "org.apache.iceberg.rest.RESTCatalog" )
25232530 .config ("spark.sql.catalog.integration.cache-enabled" , "false" )
@@ -2526,14 +2533,22 @@ def spark() -> "SparkSession":
25262533 .config ("spark.sql.catalog.integration.warehouse" , "s3://warehouse/wh/" )
25272534 .config ("spark.sql.catalog.integration.s3.endpoint" , "http://localhost:9000" )
25282535 .config ("spark.sql.catalog.integration.s3.path-style-access" , "true" )
2529- .config ("spark.sql.defaultCatalog" , "integration" )
25302536 .config ("spark.sql.catalog.hive" , "org.apache.iceberg.spark.SparkCatalog" )
25312537 .config ("spark.sql.catalog.hive.type" , "hive" )
25322538 .config ("spark.sql.catalog.hive.uri" , "http://localhost:9083" )
25332539 .config ("spark.sql.catalog.hive.io-impl" , "org.apache.iceberg.aws.s3.S3FileIO" )
25342540 .config ("spark.sql.catalog.hive.warehouse" , "s3://warehouse/hive/" )
25352541 .config ("spark.sql.catalog.hive.s3.endpoint" , "http://localhost:9000" )
25362542 .config ("spark.sql.catalog.hive.s3.path-style-access" , "true" )
2543+ .config ("spark.sql.catalog.spark_catalog" , "org.apache.iceberg.spark.SparkSessionCatalog" )
2544+ .config ("spark.sql.catalog.spark_catalog.type" , "hive" )
2545+ .config ("spark.sql.catalog.spark_catalog.uri" , "http://localhost:9083" )
2546+ .config ("spark.sql.catalog.spark_catalog.io-impl" , "org.apache.iceberg.aws.s3.S3FileIO" )
2547+ .config ("spark.sql.catalog.spark_catalog.warehouse" , "s3://warehouse/hive/" )
2548+ .config ("spark.sql.catalog.spark_catalog.s3.endpoint" , "http://localhost:9000" )
2549+ .config ("spark.sql.catalog.spark_catalog.s3.path-style-access" , "true" )
2550+ .config ("spark.sql.catalogImplementation" , "hive" )
2551+ .config ("spark.sql.defaultCatalog" , "integration" )
25372552 .config ("spark.sql.execution.arrow.pyspark.enabled" , "true" )
25382553 .getOrCreate ()
25392554 )
0 commit comments