File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 2222from pyiceberg .schema import Schema
2323from pyiceberg .types import FixedType , NestedField , UUIDType
2424
25- spark = SparkSession .builder .getOrCreate ()
25+ # The configuration is important, otherwise we get many small
26+ # parquet files with a single row. When a positional delete
27+ # hits the Parquet file with one row, the parquet file gets
28+ # dropped instead of having a merge-on-read delete file.
29+ spark = (
30+ SparkSession
31+ .builder
32+ .config ("spark.sql.shuffle.partitions" , "1" )
33+ .config ("spark.default.parallelism" , "1" )
34+ .getOrCreate ()
35+ )
2636
2737catalogs = {
2838 'rest' : load_catalog (
120130 """
121131 )
122132
123- # Partitioning is not really needed, but there is a bug:
124- # https://github.com/apache/iceberg/pull/7685
125- spark .sql (f"ALTER TABLE { catalog_name } .default.test_positional_mor_deletes ADD PARTITION FIELD years(dt) AS dt_years" )
126-
127133 spark .sql (
128134 f"""
129135 INSERT INTO { catalog_name } .default.test_positional_mor_deletes
168174 """
169175 )
170176
171- # Partitioning is not really needed, but there is a bug:
172- # https://github.com/apache/iceberg/pull/7685
173- spark .sql (f"ALTER TABLE { catalog_name } .default.test_positional_mor_double_deletes ADD PARTITION FIELD years(dt) AS dt_years" )
174-
175177 spark .sql (
176178 f"""
177179 INSERT INTO { catalog_name } .default.test_positional_mor_double_deletes
You can’t perform that action at this time.
0 commit comments