Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 9 additions & 116 deletions overturemaps/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
specified bounding box in a few different file formats.

"""

import json
import os
import sys
Expand All @@ -15,31 +16,10 @@
import pyarrow.dataset as ds
import pyarrow.compute as pc
import pyarrow.fs as fs
import pyarrow.parquet as pq
import shapely.wkb

from . core import record_batch_reader, get_all_overture_types


def get_writer(output_format, path, schema):
if output_format == "geojson":
writer = GeoJSONWriter(path)
elif output_format == "geojsonseq":
writer = GeoJSONSeqWriter(path)
elif output_format == "geoparquet":
# Update the geoparquet metadata to remove the file-level bbox which
# will no longer apply to this file. Since we cannot write the field at
# the end, just remove it as it's optional. Let the per-row bounding
# boxes do all the work.
metadata = schema.metadata
geo = json.loads(metadata[b"geo"])
for column in geo["columns"].keys():
column.pop("bbox")
metadata[b"geo"] = json.dumps(geo).encode("utf-8")
schema = schema.with_metadata(metadata)
from geoarrow.rust.core import write_geojson, write_geojson_lines, write_parquet

writer = pq.ParquetWriter(path, schema)
return writer
from .core import record_batch_reader, get_all_overture_types


class BboxParamType(click.ParamType):
Expand Down Expand Up @@ -89,99 +69,12 @@ def download(bbox, output_format, output, type_):
if reader is None:
return

with get_writer(output_format, output, schema=reader.schema) as writer:
copy(reader, writer)


def copy(reader, writer):
while True:
try:
batch = reader.read_next_batch()
except StopIteration:
break
if batch.num_rows > 0:
writer.write_batch(batch)


class BaseGeoJSONWriter:
"""
A base feature writer that manages either a file handle
or output stream. Subclasses should implement write_feature()
and finalize() if needed
"""

def __init__(self, where):
self.file_handle = None
if isinstance(where, str):
self.file_handle = open(os.path.expanduser(where), "w")
self.writer = self.file_handle
else:
self.writer = where
self.is_open = True

def __enter__(self):
return self

def __exit__(self, exc_type, value, traceback):
self.close()

def close(self):
if not self.is_open:
return
self.finalize()
if self.file_handle:
self.file_handle.close()
self.is_open = False

def write_batch(self, batch):
if batch.num_rows == 0:
return

for row in batch.to_pylist():
feature = self.row_to_feature(row)
self.write_feature(feature)

def write_feature(self, feature):
pass

def finalize(self):
pass

def row_to_feature(self, row):
geometry = shapely.wkb.loads(row.pop("geometry"))
row.pop("bbox")

# This only removes null values in the top-level dictionary but will leave in
# nulls in sub-properties
properties = {k: v for k, v in row.items() if k != "bbox" and v is not None}
return {
"type": "Feature",
"geometry": geometry.__geo_interface__,
"properties": properties,
}


class GeoJSONSeqWriter(BaseGeoJSONWriter):
def write_feature(self, feature):
self.writer.write(json.dumps(feature, separators=(",", ":")))
self.writer.write("\n")


class GeoJSONWriter(BaseGeoJSONWriter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._has_written_feature = False

self.writer.write('{"type": "FeatureCollection", "features": [\n')

def write_feature(self, feature):
if self._has_written_feature:
self.writer.write(",\n")
self.writer.write(json.dumps(feature, separators=(",", ":")))
self._has_written_feature = True

def finalize(self):
self.writer.write("]}")
if output_format == "geojson":
write_geojson(reader, output)
elif output_format == "geojsonseq":
write_geojson_lines(reader, output)
elif output_format == "geoparquet":
write_parquet(reader, output)


if __name__ == "__main__":
Expand Down
74 changes: 16 additions & 58 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
python = "^3.11"
click = "^8.1.7"
pyarrow = "^15.0.2"
shapely = "^2.0.3"
geoarrow-rust-core = "^0.2.0"

[tool.poetry.scripts]
overturemaps = 'overturemaps.cli:cli'
Expand Down