File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -100,7 +100,9 @@ def write_partitioned_dataset(
100100 partition_dir = output_dir / f"geohash_prefix={ prefix } "
101101 partition_dir .mkdir ()
102102 group .sort_values ("geohash_sort" )[cols ].to_parquet (
103- partition_dir / "part-0.parquet"
103+ partition_dir / "part-0.parquet" ,
104+ write_covering_bbox = True ,
105+ row_group_size = 100_000 ,
104106 )
105107 if (i + 1 ) % 100 == 0 :
106108 print (f" { i + 1 } /{ n_partitions } partitions written..." )
@@ -218,7 +220,9 @@ def write_label_partitioned_dataset(
218220 partition_dir = output_dir / f"{ partition_col } ={ safe_value } "
219221 partition_dir .mkdir ()
220222 group .sort_values (sort_col )[cols ].to_parquet (
221- partition_dir / "part-0.parquet"
223+ partition_dir / "part-0.parquet" ,
224+ write_covering_bbox = True ,
225+ row_group_size = 100_000 ,
222226 )
223227 if (i + 1 ) % 25 == 0 :
224228 print (f" { i + 1 } /{ n_partitions } partitions written..." )
You can’t perform that action at this time.
0 commit comments