diff --git a/overturemaps/cli.py b/overturemaps/cli.py index cd10ab7..29e7e47 100644 --- a/overturemaps/cli.py +++ b/overturemaps/cli.py @@ -5,6 +5,7 @@ specified bounding box in a few different file formats. """ + import json import os import sys @@ -15,31 +16,10 @@ import pyarrow.dataset as ds import pyarrow.compute as pc import pyarrow.fs as fs -import pyarrow.parquet as pq -import shapely.wkb - -from . core import record_batch_reader, get_all_overture_types - -def get_writer(output_format, path, schema): - if output_format == "geojson": - writer = GeoJSONWriter(path) - elif output_format == "geojsonseq": - writer = GeoJSONSeqWriter(path) - elif output_format == "geoparquet": - # Update the geoparquet metadata to remove the file-level bbox which - # will no longer apply to this file. Since we cannot write the field at - # the end, just remove it as it's optional. Let the per-row bounding - # boxes do all the work. - metadata = schema.metadata - geo = json.loads(metadata[b"geo"]) - for column in geo["columns"].keys(): - column.pop("bbox") - metadata[b"geo"] = json.dumps(geo).encode("utf-8") - schema = schema.with_metadata(metadata) +from geoarrow.rust.core import write_geojson, write_geojson_lines, write_parquet - writer = pq.ParquetWriter(path, schema) - return writer +from .core import record_batch_reader, get_all_overture_types class BboxParamType(click.ParamType): @@ -89,99 +69,12 @@ def download(bbox, output_format, output, type_): if reader is None: return - with get_writer(output_format, output, schema=reader.schema) as writer: - copy(reader, writer) - - -def copy(reader, writer): - while True: - try: - batch = reader.read_next_batch() - except StopIteration: - break - if batch.num_rows > 0: - writer.write_batch(batch) - - -class BaseGeoJSONWriter: - """ - A base feature writer that manages either a file handle - or output stream. Subclasses should implement write_feature() - and finalize() if needed - """ - - def __init__(self, where): - self.file_handle = None - if isinstance(where, str): - self.file_handle = open(os.path.expanduser(where), "w") - self.writer = self.file_handle - else: - self.writer = where - self.is_open = True - - def __enter__(self): - return self - - def __exit__(self, exc_type, value, traceback): - self.close() - - def close(self): - if not self.is_open: - return - self.finalize() - if self.file_handle: - self.file_handle.close() - self.is_open = False - - def write_batch(self, batch): - if batch.num_rows == 0: - return - - for row in batch.to_pylist(): - feature = self.row_to_feature(row) - self.write_feature(feature) - - def write_feature(self, feature): - pass - - def finalize(self): - pass - - def row_to_feature(self, row): - geometry = shapely.wkb.loads(row.pop("geometry")) - row.pop("bbox") - - # This only removes null values in the top-level dictionary but will leave in - # nulls in sub-properties - properties = {k: v for k, v in row.items() if k != "bbox" and v is not None} - return { - "type": "Feature", - "geometry": geometry.__geo_interface__, - "properties": properties, - } - - -class GeoJSONSeqWriter(BaseGeoJSONWriter): - def write_feature(self, feature): - self.writer.write(json.dumps(feature, separators=(",", ":"))) - self.writer.write("\n") - - -class GeoJSONWriter(BaseGeoJSONWriter): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._has_written_feature = False - - self.writer.write('{"type": "FeatureCollection", "features": [\n') - - def write_feature(self, feature): - if self._has_written_feature: - self.writer.write(",\n") - self.writer.write(json.dumps(feature, separators=(",", ":"))) - self._has_written_feature = True - - def finalize(self): - self.writer.write("]}") + if output_format == "geojson": + write_geojson(reader, output) + elif output_format == "geojsonseq": + write_geojson_lines(reader, output) + elif output_format == "geoparquet": + write_parquet(reader, output) if __name__ == "__main__": diff --git a/poetry.lock b/poetry.lock index 0732be7..3d7884a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -25,6 +25,21 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "geoarrow-rust-core" +version = "0.2.0" +description = "Efficient, vectorized geospatial operations in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "geoarrow_rust_core-0.2.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:1b16edae465bd39a274918a42f052ec8ff279650d45e48923b2f05eb52480fd1"}, + {file = "geoarrow_rust_core-0.2.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:68f0be67f362dcb97bc3ef48a741a2d3549a311e4489e8df38ef9efc23dd513c"}, + {file = "geoarrow_rust_core-0.2.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:824e184a3d5e29f652ef024fc34bcebc8aea8ae454d9999ed65aa1f2551ed3c1"}, + {file = "geoarrow_rust_core-0.2.0-cp38-abi3-win32.whl", hash = "sha256:834eb61ae91aa6bdf402f4e924b1019526661d5e7d7efefff153c9c86a2272c1"}, + {file = "geoarrow_rust_core-0.2.0-cp38-abi3-win_amd64.whl", hash = "sha256:80e2c773235e50e2bee1819c7b44cf3d298c37e334c47111ed58345804f09319"}, + {file = "geoarrow_rust_core-0.2.0.tar.gz", hash = "sha256:4f293c7e36ff2206d3e7dd96342897c1b2634618c2cfb9175574ef2705ea825f"}, +] + [[package]] name = "numpy" version = "1.26.4" @@ -118,64 +133,7 @@ files = [ [package.dependencies] numpy = ">=1.16.6,<2" -[[package]] -name = "shapely" -version = "2.0.3" -description = "Manipulation and analysis of geometric objects" -optional = false -python-versions = ">=3.7" -files = [ - {file = "shapely-2.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:af7e9abe180b189431b0f490638281b43b84a33a960620e6b2e8d3e3458b61a1"}, - {file = "shapely-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:98040462b36ced9671e266b95c326b97f41290d9d17504a1ee4dc313a7667b9c"}, - {file = "shapely-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:71eb736ef2843f23473c6e37f6180f90f0a35d740ab284321548edf4e55d9a52"}, - {file = "shapely-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:881eb9dbbb4a6419667e91fcb20313bfc1e67f53dbb392c6840ff04793571ed1"}, - {file = "shapely-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f10d2ccf0554fc0e39fad5886c839e47e207f99fdf09547bc687a2330efda35b"}, - {file = "shapely-2.0.3-cp310-cp310-win32.whl", hash = "sha256:6dfdc077a6fcaf74d3eab23a1ace5abc50c8bce56ac7747d25eab582c5a2990e"}, - {file = "shapely-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:64c5013dacd2d81b3bb12672098a0b2795c1bf8190cfc2980e380f5ef9d9e4d9"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:56cee3e4e8159d6f2ce32e421445b8e23154fd02a0ac271d6a6c0b266a8e3cce"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:619232c8276fded09527d2a9fd91a7885ff95c0ff9ecd5e3cb1e34fbb676e2ae"}, - {file = "shapely-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2a7d256db6f5b4b407dc0c98dd1b2fcf1c9c5814af9416e5498d0a2e4307a4b"}, - {file = "shapely-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45f0c8cd4583647db3216d965d49363e6548c300c23fd7e57ce17a03f824034"}, - {file = "shapely-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13cb37d3826972a82748a450328fe02a931dcaed10e69a4d83cc20ba021bc85f"}, - {file = "shapely-2.0.3-cp311-cp311-win32.whl", hash = "sha256:9302d7011e3e376d25acd30d2d9e70d315d93f03cc748784af19b00988fc30b1"}, - {file = "shapely-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6b464f2666b13902835f201f50e835f2f153f37741db88f68c7f3b932d3505fa"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e86e7cb8e331a4850e0c2a8b2d66dc08d7a7b301b8d1d34a13060e3a5b4b3b55"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c91981c99ade980fc49e41a544629751a0ccd769f39794ae913e53b07b2f78b9"}, - {file = "shapely-2.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd45d456983dc60a42c4db437496d3f08a4201fbf662b69779f535eb969660af"}, - {file = "shapely-2.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:882fb1ffc7577e88c1194f4f1757e277dc484ba096a3b94844319873d14b0f2d"}, - {file = "shapely-2.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9f2d93bff2ea52fa93245798cddb479766a18510ea9b93a4fb9755c79474889"}, - {file = "shapely-2.0.3-cp312-cp312-win32.whl", hash = "sha256:99abad1fd1303b35d991703432c9481e3242b7b3a393c186cfb02373bf604004"}, - {file = "shapely-2.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:6f555fe3304a1f40398977789bc4fe3c28a11173196df9ece1e15c5bc75a48db"}, - {file = "shapely-2.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a983cc418c1fa160b7d797cfef0e0c9f8c6d5871e83eae2c5793fce6a837fad9"}, - {file = "shapely-2.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18bddb8c327f392189a8d5d6b9a858945722d0bb95ccbd6a077b8e8fc4c7890d"}, - {file = "shapely-2.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:442f4dcf1eb58c5a4e3428d88e988ae153f97ab69a9f24e07bf4af8038536325"}, - {file = "shapely-2.0.3-cp37-cp37m-win32.whl", hash = "sha256:31a40b6e3ab00a4fd3a1d44efb2482278642572b8e0451abdc8e0634b787173e"}, - {file = "shapely-2.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:59b16976c2473fec85ce65cc9239bef97d4205ab3acead4e6cdcc72aee535679"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:705efbce1950a31a55b1daa9c6ae1c34f1296de71ca8427974ec2f27d57554e3"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:601c5c0058a6192df704cb889439f64994708563f57f99574798721e9777a44b"}, - {file = "shapely-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f24ecbb90a45c962b3b60d8d9a387272ed50dc010bfe605f1d16dfc94772d8a1"}, - {file = "shapely-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c2a2989222c6062f7a0656e16276c01bb308bc7e5d999e54bf4e294ce62e76"}, - {file = "shapely-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42bceb9bceb3710a774ce04908fda0f28b291323da2688f928b3f213373b5aee"}, - {file = "shapely-2.0.3-cp38-cp38-win32.whl", hash = "sha256:54d925c9a311e4d109ec25f6a54a8bd92cc03481a34ae1a6a92c1fe6729b7e01"}, - {file = "shapely-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:300d203b480a4589adefff4c4af0b13919cd6d760ba3cbb1e56275210f96f654"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:083d026e97b6c1f4a9bd2a9171c7692461092ed5375218170d91705550eecfd5"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:27b6e1910094d93e9627f2664121e0e35613262fc037051680a08270f6058daf"}, - {file = "shapely-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:71b2de56a9e8c0e5920ae5ddb23b923490557ac50cb0b7fa752761bf4851acde"}, - {file = "shapely-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d279e56bbb68d218d63f3efc80c819cedcceef0e64efbf058a1df89dc57201b"}, - {file = "shapely-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88566d01a30f0453f7d038db46bc83ce125e38e47c5f6bfd4c9c287010e9bf74"}, - {file = "shapely-2.0.3-cp39-cp39-win32.whl", hash = "sha256:58afbba12c42c6ed44c4270bc0e22f3dadff5656d711b0ad335c315e02d04707"}, - {file = "shapely-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:5026b30433a70911979d390009261b8c4021ff87c7c3cbd825e62bb2ffa181bc"}, - {file = "shapely-2.0.3.tar.gz", hash = "sha256:4d65d0aa7910af71efa72fd6447e02a8e5dd44da81a983de9d736d6e6ccbe674"}, -] - -[package.dependencies] -numpy = ">=1.14,<2" - -[package.extras] -docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] -test = ["pytest", "pytest-cov"] - [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "0843c8d8603adeff503c2def332638e6049decc1456e088ee37fc0c150ce8431" +content-hash = "bf8a9ee3fe998d76e386bd80fc9d648a5669621b14d29a788f97a17ba16e9c4e" diff --git a/pyproject.toml b/pyproject.toml index 2769f06..b6d9cc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" python = "^3.11" click = "^8.1.7" pyarrow = "^15.0.2" -shapely = "^2.0.3" +geoarrow-rust-core = "^0.2.0" [tool.poetry.scripts] overturemaps = 'overturemaps.cli:cli'