Skip to content

Commit c75901b

Browse files
committed
Add a covering bounding box to final parquet files.
1 parent 86952c6 commit c75901b

1 file changed

Lines changed: 6 additions & 2 deletions

File tree

src/openpois/io/geohash_partition.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ def write_partitioned_dataset(
100100
partition_dir = output_dir / f"geohash_prefix={prefix}"
101101
partition_dir.mkdir()
102102
group.sort_values("geohash_sort")[cols].to_parquet(
103-
partition_dir / "part-0.parquet"
103+
partition_dir / "part-0.parquet",
104+
write_covering_bbox = True,
105+
row_group_size = 100_000,
104106
)
105107
if (i + 1) % 100 == 0:
106108
print(f" {i + 1}/{n_partitions} partitions written...")
@@ -218,7 +220,9 @@ def write_label_partitioned_dataset(
218220
partition_dir = output_dir / f"{partition_col}={safe_value}"
219221
partition_dir.mkdir()
220222
group.sort_values(sort_col)[cols].to_parquet(
221-
partition_dir / "part-0.parquet"
223+
partition_dir / "part-0.parquet",
224+
write_covering_bbox = True,
225+
row_group_size = 100_000,
222226
)
223227
if (i + 1) % 25 == 0:
224228
print(f" {i + 1}/{n_partitions} partitions written...")

0 commit comments

Comments
 (0)