Skip to content

Commit 3520c53

Browse files
committed
Use geoarrow.rust.core for IO
Remove all Python IO in favor of GeoArrow Rust Python bindings
1 parent 4db526e commit 3520c53

3 files changed

Lines changed: 26 additions & 175 deletions

File tree

overturemaps/cli.py

Lines changed: 9 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
specified bounding box in a few different file formats.
66
77
"""
8+
89
import json
910
import os
1011
import sys
@@ -15,31 +16,10 @@
1516
import pyarrow.dataset as ds
1617
import pyarrow.compute as pc
1718
import pyarrow.fs as fs
18-
import pyarrow.parquet as pq
19-
import shapely.wkb
20-
21-
from . core import record_batch_reader, get_all_overture_types
22-
2319

24-
def get_writer(output_format, path, schema):
25-
if output_format == "geojson":
26-
writer = GeoJSONWriter(path)
27-
elif output_format == "geojsonseq":
28-
writer = GeoJSONSeqWriter(path)
29-
elif output_format == "geoparquet":
30-
# Update the geoparquet metadata to remove the file-level bbox which
31-
# will no longer apply to this file. Since we cannot write the field at
32-
# the end, just remove it as it's optional. Let the per-row bounding
33-
# boxes do all the work.
34-
metadata = schema.metadata
35-
geo = json.loads(metadata[b"geo"])
36-
for column in geo["columns"].keys():
37-
column.pop("bbox")
38-
metadata[b"geo"] = json.dumps(geo).encode("utf-8")
39-
schema = schema.with_metadata(metadata)
20+
from geoarrow.rust.core import write_geojson, write_geojson_lines, write_parquet
4021

41-
writer = pq.ParquetWriter(path, schema)
42-
return writer
22+
from .core import record_batch_reader, get_all_overture_types
4323

4424

4525
class BboxParamType(click.ParamType):
@@ -89,99 +69,12 @@ def download(bbox, output_format, output, type_):
8969
if reader is None:
9070
return
9171

92-
with get_writer(output_format, output, schema=reader.schema) as writer:
93-
copy(reader, writer)
94-
95-
96-
def copy(reader, writer):
97-
while True:
98-
try:
99-
batch = reader.read_next_batch()
100-
except StopIteration:
101-
break
102-
if batch.num_rows > 0:
103-
writer.write_batch(batch)
104-
105-
106-
class BaseGeoJSONWriter:
107-
"""
108-
A base feature writer that manages either a file handle
109-
or output stream. Subclasses should implement write_feature()
110-
and finalize() if needed
111-
"""
112-
113-
def __init__(self, where):
114-
self.file_handle = None
115-
if isinstance(where, str):
116-
self.file_handle = open(os.path.expanduser(where), "w")
117-
self.writer = self.file_handle
118-
else:
119-
self.writer = where
120-
self.is_open = True
121-
122-
def __enter__(self):
123-
return self
124-
125-
def __exit__(self, exc_type, value, traceback):
126-
self.close()
127-
128-
def close(self):
129-
if not self.is_open:
130-
return
131-
self.finalize()
132-
if self.file_handle:
133-
self.file_handle.close()
134-
self.is_open = False
135-
136-
def write_batch(self, batch):
137-
if batch.num_rows == 0:
138-
return
139-
140-
for row in batch.to_pylist():
141-
feature = self.row_to_feature(row)
142-
self.write_feature(feature)
143-
144-
def write_feature(self, feature):
145-
pass
146-
147-
def finalize(self):
148-
pass
149-
150-
def row_to_feature(self, row):
151-
geometry = shapely.wkb.loads(row.pop("geometry"))
152-
row.pop("bbox")
153-
154-
# This only removes null values in the top-level dictionary but will leave in
155-
# nulls in sub-properties
156-
properties = {k: v for k, v in row.items() if k != "bbox" and v is not None}
157-
return {
158-
"type": "Feature",
159-
"geometry": geometry.__geo_interface__,
160-
"properties": properties,
161-
}
162-
163-
164-
class GeoJSONSeqWriter(BaseGeoJSONWriter):
165-
def write_feature(self, feature):
166-
self.writer.write(json.dumps(feature, separators=(",", ":")))
167-
self.writer.write("\n")
168-
169-
170-
class GeoJSONWriter(BaseGeoJSONWriter):
171-
def __init__(self, *args, **kwargs):
172-
super().__init__(*args, **kwargs)
173-
self._has_written_feature = False
174-
175-
self.writer.write('{"type": "FeatureCollection", "features": [\n')
176-
177-
def write_feature(self, feature):
178-
if self._has_written_feature:
179-
self.writer.write(",\n")
180-
self.writer.write(json.dumps(feature, separators=(",", ":")))
181-
self._has_written_feature = True
182-
183-
def finalize(self):
184-
self.writer.write("]}")
72+
if output_format == "geojson":
73+
write_geojson(reader, output)
74+
elif output_format == "geojsonseq":
75+
write_geojson_lines(reader, output)
76+
elif output_format == "geoparquet":
77+
write_parquet(reader, output)
18578

18679

18780
if __name__ == "__main__":

poetry.lock

Lines changed: 16 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ readme = "README.md"
1010
python = "^3.11"
1111
click = "^8.1.7"
1212
pyarrow = "^15.0.2"
13-
shapely = "^2.0.3"
13+
geoarrow-rust-core = "^0.2.0"
1414

1515
[tool.poetry.scripts]
1616
overturemaps = 'overturemaps.cli:cli'

0 commit comments

Comments
 (0)