Skip to content

Commit 3798e58

Browse files
Update remote pygeoapi (#60)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 9a51c5d commit 3798e58

4 files changed

Lines changed: 189 additions & 74 deletions

File tree

pygeoapi_plugins/process/intersect.py

Lines changed: 110 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,28 @@
3030
import logging
3131
import tempfile
3232
from osgeo import gdal, ogr
33-
from typing import Tuple
33+
from typing import Tuple, Any
3434
from zipfile import ZipFile
3535
from pyproj import CRS
3636

3737
from pygeoapi.crs import transform_bbox
3838
from pygeoapi.config import get_config
3939
from pygeoapi.plugin import load_plugin
4040
from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError
41+
from pygeoapi.provider import get_provider_by_type, filter_providers_by_type
4142
from pygeoapi.provider.ogr import GdalErrorHandler
42-
from pygeoapi.util import filter_dict_by_key_value, get_provider_default, to_json
43+
from pygeoapi.util import filter_dict_by_key_value, to_json
4344

4445
LOGGER = logging.getLogger(__name__)
4546

4647
CONFIG = get_config()
4748
COLLECTIONS = filter_dict_by_key_value(CONFIG['resources'], 'type', 'collection')
49+
FEATURE_COLLECTIONS = [
50+
cid
51+
for cid, cdef in COLLECTIONS.items()
52+
if filter_providers_by_type(cdef['providers'], 'feature')
53+
]
54+
FIRST_COLLECTION = next(iter(FEATURE_COLLECTIONS))
4855

4956
PROCESS_DEF = CONFIG['resources']['intersector']
5057
PROCESS_DEF.update(
@@ -85,8 +92,8 @@
8592
'keywords': {'en': ['OGC API', 'collection']},
8693
'schema': {
8794
'type': 'string',
88-
'example': next(iter(COLLECTIONS)),
89-
'enum': list(COLLECTIONS),
95+
'example': FIRST_COLLECTION,
96+
'enum': FEATURE_COLLECTIONS,
9097
},
9198
'minOccurs': 1,
9299
'maxOccurs': 1,
@@ -106,7 +113,7 @@
106113
'example': {
107114
'inputs': {
108115
'url': 'https://demo.pygeoapi.io/master/collections/obs/items/238', # noqa
109-
'collection': next(iter(COLLECTIONS)),
116+
'collection': FIRST_COLLECTION,
110117
}
111118
},
112119
}
@@ -145,99 +152,126 @@ def execute(self, data, outputs=None):
145152
:returns: 'application/json'
146153
"""
147154
mimetype = 'application/json'
155+
fc_out = {'type': 'FeatureCollection', 'features': []}
148156

149-
if not data.get('url') and not data.get('file'):
150-
raise ProcessorExecuteError('Invalid input, no Feature to intersect')
151-
152-
if data.get('url') and data.get('file'):
153-
raise ProcessorExecuteError(
154-
'Invalid input, provide either url or feature, not both'
155-
)
157+
# Validate input and get feature layer
158+
collection = self.validate_inputs(data)
159+
layer, bbox = self.get_layer(url=data.get('url'), file=data.get('file'))
156160

157-
collection = data.get('collection')
158-
if not collection:
159-
raise ProcessorExecuteError('Invalid input: no collection specified')
160-
elif collection not in COLLECTIONS:
161-
raise ProcessorExecuteError(
162-
f'Invalid input: collection {collection} not found'
163-
)
164-
165-
layer, bbox = self.get_layer(**data)
166-
provider_def = get_provider_default(
167-
CONFIG['resources'][collection]['providers']
161+
# Validate input and get provider definition for collection
162+
provider_def = get_provider_by_type(
163+
CONFIG['resources'][collection]['providers'], 'feature'
168164
)
169165
p = load_plugin('provider', provider_def)
170-
hits = p.query(bbox=bbox, resulttype='hits')['numberMatched']
171166

167+
# Fetch features using bbox of input data
168+
result = p.query(bbox=bbox, resulttype='hits')
169+
# Try to determine number of features to fetch
170+
# if the provider supports it, otherwise default to
171+
# arbitrary large number to fetch all features in bbox
172+
hits = result.get('numberMatched', 100000)
173+
# Handle no features found
172174
if hits == 0:
173-
msg = 'No features found in collection for provided feature bbox'
174-
LOGGER.info(msg)
175-
outputs = {
176-
'type': 'FeatureCollection',
177-
'features': [],
178-
'numberMatched': 0,
179-
'numberReturned': 0,
180-
}
181-
LOGGER.debug('Returning response')
182-
return mimetype, outputs
175+
fc_out['numberReturned'] = hits
176+
LOGGER.info('No features found in collection for provided bbox')
177+
return mimetype, fc_out
183178

179+
# Fetch features and insert into output FeatureCollection
180+
# if they intersect with provided geometry
184181
features = p.query(bbox=bbox, limit=hits)
185-
186-
out_features = []
187182
for feature in features['features']:
188183
geom = ogr.CreateGeometryFromJson(to_json(feature['geometry']))
189-
190184
if geom and geom.Intersects(layer):
191-
out_features.append(feature)
185+
fc_out['features'].append(feature)
186+
187+
# Add numberReturned to output and return
188+
true_hits = len(fc_out['features'])
189+
fc_out['numberReturned'] = true_hits
190+
return mimetype, fc_out
192191

193-
outputs = {
194-
'type': 'FeatureCollection',
195-
'features': out_features,
196-
'numberReturned': len(out_features),
197-
}
192+
def validate_inputs(self, data) -> str:
193+
"""
194+
Validate input data
198195
199-
return mimetype, outputs
196+
:param data: processor arguments
197+
198+
:returns: valid collection_id
199+
200+
:raises ProcessorExecuteError: if invalid input
201+
"""
202+
collection = data.get('collection')
203+
204+
if not data.get('url') and not data.get('file'):
205+
msg = 'Invalid input, provide either url or feature'
206+
raise ProcessorExecuteError(msg)
207+
208+
if data.get('url') and data.get('file'):
209+
msg = 'Invalid input, provide either url or feature, not both'
210+
raise ProcessorExecuteError(msg)
211+
212+
if not collection:
213+
msg = 'Invalid input: no collection specified'
214+
raise ProcessorExecuteError(msg)
215+
216+
if collection not in FEATURE_COLLECTIONS:
217+
msg = f'Invalid input: collection {collection} not found'
218+
raise ProcessorExecuteError(msg)
219+
220+
return collection
200221

201222
def get_layer(
202-
self, url: str = None, file: bytes = None, with_bbox=False, **kwargs
203-
) -> Tuple[ogr.Geometry, Tuple[float]]:
223+
self, url: str | None = None, file: Any = None
224+
) -> Tuple[ogr.Geometry, list[float]]:
204225
"""
205-
Private Function: Load feature WKY from URL or bytes of OGR
226+
Private Function: Load feature WKT from URL or bytes of OGR
206227
like file.
207228
208229
:param url: URL of feature
209-
:param feature: feature as string
230+
:param feature: feature as string or byte string
210231
211232
:returns: feature as OGC Layer and WGS84 bbox
212233
"""
234+
213235
if url:
236+
# Handle zip and remote files
214237
if url.startswith('http') and url.endswith('.zip'):
215238
url = f'/vsizip//vsicurl/{url}'
216239

217-
ds = gdal.OpenEx(url, gdal.OF_VECTOR)
218-
if ds is None:
240+
# Read feature from URL with GDAL
241+
try:
242+
ds = gdal.OpenEx(url, gdal.OF_VECTOR)
243+
assert ds is not None, 'GDAL could not open feature from URL.'
244+
except (RuntimeError, AssertionError):
219245
raise ProcessorExecuteError('GDAL could not open feature.')
220246

221247
layer = ds.GetLayer()
222248
srs = layer.GetSpatialRef()
249+
223250
elif file:
224-
if not isinstance(file, bytes):
251+
# Normalize file input to bytes
252+
if isinstance(file, str):
225253
file = bytes(file, 'utf-8')
254+
elif isinstance(file, dict):
255+
file = bytes(to_json(file), 'utf-8')
256+
elif not isinstance(file, bytes):
257+
raise ProcessorExecuteError('Invalid file input')
226258

227-
# Use /vsistdin/ to read bytes in-memory
259+
# Guess file type in-memory
260+
file_suffix = ''
228261
if file[:2] == b'PK':
229-
ext = '.zip'
262+
file_suffix = '.zip'
230263
elif file[:4] == b'PAR1':
231-
ext = '.parquet'
232-
else:
233-
ext = ''
264+
file_suffix = '.parquet'
234265

235-
with tempfile.NamedTemporaryFile(suffix=ext) as tmp:
266+
# Write to temporary file to allow for use of
267+
# GDAL virtual file systems (e.g. /vsizip/)
268+
with tempfile.NamedTemporaryFile(suffix=file_suffix) as tmp:
236269
tmp.write(file)
237270
tmp.flush()
238271
tmp_path = tmp.name
239272

240-
if ext == '.zip':
273+
# Handle zipped files using /vsizip/
274+
if file_suffix == '.zip':
241275
with ZipFile(tmp_path, 'r') as zip_ref:
242276
[filename] = [
243277
f
@@ -246,25 +280,34 @@ def get_layer(
246280
]
247281
tmp_path = f'/vsizip/{tmp_path}/{filename}'
248282

249-
ds = gdal.OpenEx(tmp_path, gdal.OF_VECTOR)
250-
if ds is None:
251-
raise ProcessorExecuteError(
252-
'GDAL could not open feature from bytes (temp file fallback).'
253-
) # noqa
254-
283+
# Attempt to open file with GDAL
284+
try:
285+
ds = gdal.OpenEx(tmp_path, gdal.OF_VECTOR)
286+
msg = f'GDAL could not open feature from file {tmp_path}.'
287+
assert ds is not None, msg
288+
except (RuntimeError, AssertionError):
289+
msg = 'GDAL could not open feature.'
290+
raise ProcessorExecuteError(msg)
291+
292+
# Read layer to ensure it persists
293+
# after temporary file is cleaned up
255294
layer = ds.GetLayer()
256295
srs = layer.GetSpatialRef()
257296
else:
258297
raise ProcessorExecuteError('No input provided.')
259298

299+
# Create union of all features in one geometry
260300
union = ogr.Geometry(ogr.wkbGeometryCollection)
261301
for feat in layer:
262302
geom = feat.GetGeometryRef()
263-
union.AddGeometry(geom)
303+
try:
304+
union.AddGeometry(geom)
305+
except (ValueError, TypeError):
306+
raise ProcessorExecuteError('Invalid geometry found.')
264307

308+
# Attempt to get bbox of layer
265309
(minx, maxx, miny, maxy) = layer.GetExtent()
266-
bbox = (minx, miny, maxx, maxy)
267-
310+
bbox = [minx, miny, maxx, maxy]
268311
if srs:
269312
bbox = transform_bbox(bbox, CRS(srs.ExportToWkt()), CRS('EPSG:4326'))
270313

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@ allow-direct-references = true
5454
exclude = ["tests"]
5555

5656
[tool.uv.sources]
57-
pygeoapi = { git = "https://github.com/geopython/pygeoapi.git" }
57+
pygeoapi = { git = "https://github.com/internetofwater/pygeoapi.git", branch="dev" }

tests/test_intersect_process.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,58 @@ def test_execute_raises_when_missing_collection(process_def, bytes_data):
104104
(
105105
'https://github.com/geopython/pygeoapi/raw/refs/heads/master/tests/data/coads_sst.nc',
106106
None,
107-
pytest.raises(Exception),
107+
pytest.raises(ProcessorExecuteError),
108108
), # Error case - non-vector
109-
('https://example.com', None, pytest.raises(Exception)), # Error case - bad URL
109+
(
110+
'http://example.com',
111+
None,
112+
pytest.raises(ProcessorExecuteError),
113+
), # Error case - non-geospatial URL
114+
(
115+
'https://reference.geoconnex.us/ref',
116+
None,
117+
pytest.raises(ProcessorExecuteError),
118+
), # Error case - 404 URL
110119
],
111120
)
112121
def test_get_bbox(process_def, url, bounds, ctx):
113122
proc = intersect.IntersectionProcessor(process_def)
114123
with ctx:
115-
_, bbox = proc.get_layer(url=url, as_bbox=True)
124+
_, bbox = proc.get_layer(url=url)
125+
print(bbox)
116126
assert pytest.approx(bbox) == bounds
117127

118128
with ctx:
119129
content = requests.get(url).content
120-
_, bbox = proc.get_layer(file=content, as_bbox=True)
130+
_, bbox = proc.get_layer(file=content)
131+
assert pytest.approx(bbox) == bounds
132+
133+
134+
@pytest.mark.parametrize(
135+
'url,bounds,ctx',
136+
[
137+
(
138+
'http://geoconnex.us/ref/states/08',
139+
[-109.060253, 36.992426, -102.041524, 41.003443999999995],
140+
contextlib.nullcontext(),
141+
),
142+
(
143+
'https://reference.geoconnex.us/collections/states/items',
144+
[-179.148909, -14.548699, 179.77847011250077, 71.365162],
145+
contextlib.nullcontext(),
146+
),
147+
(
148+
'https://reference.geoconnex.us',
149+
None,
150+
pytest.raises(ProcessorExecuteError),
151+
),
152+
],
153+
)
154+
def test_literal_geojson(process_def, url, bounds, ctx):
155+
proc = intersect.IntersectionProcessor(process_def)
156+
content = requests.get(url).json()
157+
with ctx:
158+
_, bbox = proc.get_layer(file=content)
121159
assert pytest.approx(bbox) == bounds
122160

123161

0 commit comments

Comments
 (0)