Skip to content
This repository was archived by the owner on Feb 23, 2026. It is now read-only.

Commit 0b3b88f

Browse files
committed
✨ Cherry pick to add support STAC legacy and 1.0
1 parent f1a0f8b commit 0b3b88f

5 files changed

Lines changed: 223 additions & 15 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ ADD . ${CUBE_BUILDER_INSTALL_PATH}
3434

3535
WORKDIR ${CUBE_BUILDER_INSTALL_PATH}
3636

37-
RUN python3 -m pip install pip --upgrade setuptools wheel && \
37+
RUN python3 -m pip install pip --upgrade "setuptools<67" wheel && \
3838
python3 -m pip install -e .[rabbitmq] && \
3939
python3 -m pip install gunicorn
4040

INSTALL.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ Install in development mode:
8686
If you have problems with the ``librabbitmq`` installation, please, see [#f1]_.
8787

8888

89+
.. note::
90+
91+
The `setuptools v67+ <https://setuptools.pypa.io/en/latest/history.html>`_ has breaking changes related
92+
Pip versions requirements. For now, you should install ``setuptools<67`` for compatibility.
93+
The packages in ``Cube-Builder`` will be upgraded to support latest version.
94+
95+
8996
Running in Development Mode
9097
---------------------------
9198

@@ -197,12 +204,14 @@ You may need to replace the definition of some parameters:
197204
The command line ``cube-builder worker`` is an auxiliary tool that wraps celery command line
198205
using ``cube_builder`` as context. In this way, all ``celery worker`` parameters are currently supported.
199206
See more in `Celery Workers Guide <https://docs.celeryproject.org/en/stable/userguide/workers.html>`_.
207+
If you keep parameters ``WORK_DIR`` and ``DATA_DIR``, just make sure its writable in order to works, otherwise,
208+
you may see issues related ``Permission Denied``.
200209

201210

202211
.. warning::
203212

204213
The ``Cube Builder`` can use a lot of memory for each concurrent process, since it opens multiple images in memory.
205-
You can limit the concurrent processes in order to prevent it.
214+
You can limit the concurrent processes with ``--concurrency NUMBER`` in order to prevent it.
206215

207216

208217
.. rubric:: Footnotes
@@ -249,4 +258,4 @@ You may need to replace the definition of some parameters:
249258
250259
.. code-block:: shell
251260
252-
$ sudo apt install autoconf
261+
$ sudo apt install autoconf

cube_builder/_adapter.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#
2+
# This file is part of Cube Builder.
3+
# Copyright (C) 2022 INPE.
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <https://www.gnu.org/licenses/gpl-3.0.html>.
17+
#
18+
19+
"""Define basic module to adapt Python libraries like STAC v1 and legacy versions."""
20+
21+
from abc import ABC, abstractmethod
22+
from copy import deepcopy
23+
from typing import List
24+
from urllib.parse import urljoin
25+
26+
import requests
27+
import shapely.geometry
28+
from pystac_client import Client
29+
from werkzeug.exceptions import abort
30+
31+
32+
class BaseSTAC(ABC):
33+
"""Define base class to represent a STAC interface to communicate with Server."""
34+
35+
uri: str
36+
"""Represent URI for server."""
37+
headers: dict
38+
"""Represent HTTP headers to be attached in requests."""
39+
params: dict
40+
"""Represent HTTP parameters for requests."""
41+
42+
def __init__(self, uri: str, params=None, headers=None, **kwargs):
43+
"""Build STAC signature."""
44+
self.uri = uri
45+
self.params = params
46+
self.headers = headers
47+
self._options = kwargs
48+
49+
@abstractmethod
50+
def search(self, **parameters) -> dict:
51+
"""Search for collection items on STAC server."""
52+
53+
@abstractmethod
54+
def items(self, collection_id: str, **kwargs) -> dict:
55+
"""Access STAC Collection Items."""
56+
57+
@abstractmethod
58+
def collections(self) -> List[dict]:
59+
"""Retrieve the collections from STAC."""
60+
61+
@abstractmethod
62+
def collection(self, collection_id: str) -> dict:
63+
"""Access STAC Collection."""
64+
65+
@staticmethod
66+
def _items_result(features: List[dict], matched: int):
67+
return {
68+
"context": {
69+
"returned": len(features),
70+
"matched": matched
71+
},
72+
"features": features
73+
}
74+
75+
76+
class STACV1(BaseSTAC):
77+
"""Define structure to add support for STAC v1.0+.
78+
79+
This implementation uses `pystac-client <https://pystac-client.readthedocs.io/en/latest/>`_
80+
to communicate with STAC v1.0.
81+
"""
82+
83+
def __init__(self, uri: str, params=None, headers=None, **kwargs):
84+
"""Build STAC instance."""
85+
super(STACV1, self).__init__(uri, params, headers, **kwargs)
86+
87+
self._instance = Client.open(uri, headers=headers, parameters=params, **kwargs)
88+
89+
def search(self, limit=10, max_items=10, **parameters) -> dict:
90+
"""Search for collection items on STAC server."""
91+
max_items = limit
92+
item_search = self._instance.search(limit=limit, max_items=max_items, **parameters)
93+
94+
items = item_search.items()
95+
items = [i.to_dict() for i in items]
96+
97+
return self._items_result(items, matched=item_search.matched())
98+
99+
def collections(self) -> List[dict]:
100+
"""Retrieve the collections from STAC."""
101+
return [c.to_dict() for c in self._instance.get_collections()]
102+
103+
def collection(self, collection_id: str) -> dict:
104+
"""Access STAC Collection."""
105+
collection = self._instance.get_collection(collection_id)
106+
return collection.to_dict()
107+
108+
def items(self, collection_id: str, **kwargs) -> dict:
109+
"""Access STAC Collection Items."""
110+
collection = self._instance.get_collection(collection_id)
111+
112+
items = collection.get_items()
113+
items = [i.to_dict() for i in items]
114+
115+
result = self.search(collections=[collection_id], limit=1, max_items=1)
116+
117+
return self._items_result(items, matched=result['context']['matched'])
118+
119+
120+
class STACLegacy(BaseSTAC):
121+
"""Define structure to add support for legacy versions of STAC server..
122+
123+
This implementation uses `requests.Session <https://requests.readthedocs.io/en/latest/user/advanced/#session-objects>`_
124+
to communicate with STAC legacy versions 0.8x, 0.9x directly.
125+
126+
By default, the ssl entries are ignored. You may override this setting using ``verify=False``.
127+
"""
128+
129+
def __init__(self, uri: str, params=None, headers=None, verify=False, **kwargs):
130+
"""Build STAC instance."""
131+
super(STACLegacy, self).__init__(uri, params, headers, **kwargs)
132+
133+
params = params or {}
134+
headers = headers or {}
135+
136+
self._params = params
137+
self._headers = headers
138+
self._session = requests.session()
139+
self._session.verify = verify
140+
141+
def search(self, **parameters) -> dict:
142+
"""Search for collection items on STAC server."""
143+
options = deepcopy(parameters)
144+
# Remove unsupported values
145+
options.pop('query', None)
146+
url = self._url_resource('search')
147+
148+
try:
149+
response = self._request(url, method='POST', data=options, headers=self._headers, params=self._params)
150+
except:
151+
# Use bbox instead
152+
geom = options.pop('intersects', None)
153+
if geom is None:
154+
raise
155+
156+
options['bbox'] = shapely.geometry.shape(geom).bounds
157+
158+
response = self._request(url, method='POST', data=options, headers=self._headers, params=self._params)
159+
160+
return response
161+
162+
def _request(self, uri: str, method: str = 'GET', data=None, headers=None, params=None):
163+
response = self._session.request(method, uri, headers=headers, params=params, json=data)
164+
if response.status_code != 200:
165+
abort(response.status_code, response.content)
166+
return response.json()
167+
168+
def collections(self) -> List[dict]:
169+
"""Retrieve the collections from STAC."""
170+
uri = self._url_resource('collections')
171+
collections = self._request(uri, params=self._params, headers=self._headers)
172+
return collections
173+
174+
def collection(self, collection_id: str) -> dict:
175+
"""Access STAC Collection."""
176+
uri = self._url_resource(f'collections/{collection_id}')
177+
collection = self._request(uri, params=self._params, headers=self._headers)
178+
return collection
179+
180+
def items(self, collection_id: str, **kwargs) -> dict:
181+
"""Access STAC Collection Items."""
182+
return self.search(collections=[collection_id], limit=1)
183+
184+
def _url_resource(self, resource: str) -> str:
185+
return urljoin(self.uri + '/', resource)
186+
187+
188+
def build_stac(uri, headers=None, **parameters) -> BaseSTAC:
189+
"""Build a STAC instance according versions."""
190+
response = requests.get(uri, timeout=15, headers=headers, params=parameters)
191+
192+
response.raise_for_status()
193+
194+
catalog = response.json()
195+
if not catalog.get('stac_version'):
196+
raise RuntimeError(f'Invalid STAC "{uri}", missing "stac_version"')
197+
198+
stac_version = catalog['stac_version']
199+
if stac_version.startswith('0.'):
200+
return STACLegacy(uri, params=parameters, headers=headers)
201+
return STACV1(uri, params=parameters, headers=headers)

cube_builder/maestro.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
from celery import chain, group
3838
from geoalchemy2 import func
3939
from geoalchemy2.shape import to_shape
40-
from stac import STAC
4140

4241
# Cube Builder
42+
from ._adapter import BaseSTAC, build_stac
4343
from .celery.tasks import prepare_blend, warp_merge
4444
from .config import Config
4545
from .constants import CLEAR_OBSERVATION_NAME, DATASOURCE_NAME, PROVENANCE_NAME, TOTAL_OBSERVATION_NAME
@@ -131,7 +131,7 @@ def __init__(self, datacube: str, collections: List[str], tiles: List[str], star
131131
self.tiles = []
132132
self.export_files = self.properties.pop('export_files', None)
133133

134-
def get_stac(self, collection: str) -> STAC:
134+
def get_stac(self, collection: str) -> BaseSTAC:
135135
"""Retrieve STAC client which provides the given collection.
136136
137137
By default, it searches for given collection on Brazil Data Cube STAC.
@@ -152,7 +152,7 @@ def get_stac(self, collection: str) -> STAC:
152152
# Search in INPE STAC
153153
return self._stac(collection, 'http://cdsr.dpi.inpe.br/inpe-stac/stac')
154154

155-
def _stac(self, collection: str, url: str, **kwargs) -> STAC:
155+
def _stac(self, collection: str, url: str, **kwargs) -> BaseSTAC:
156156
"""Check if collection is provided by given STAC url.
157157
158158
The provided STAC must follow the `SpatioTemporal Asset Catalogs spec <https://stacspec.org/>`_.
@@ -172,9 +172,7 @@ def _stac(self, collection: str, url: str, **kwargs) -> STAC:
172172
if kwargs.get('token'):
173173
options['access_token'] = kwargs.get('token')
174174

175-
stac = self.cached_stacs.get(url) or STAC(url, **options)
176-
177-
_ = stac.catalog
175+
stac = self.cached_stacs.get(url) or build_stac(url, **options)
178176

179177
_ = stac.collection(collection)
180178

@@ -614,16 +612,16 @@ def search_images(self, feature: dict, start: str, end: str, tile_id: str, **kwa
614612
stac_collection = stac.collection(dataset)
615613
if stac_collection.get('summaries') and stac_collection['summaries'].get('platform'):
616614
platforms = platforms.union(set(stac_collection['summaries'].get('platform')))
617-
elif stac_collection.properties.get('platform'):
618-
platforms = platforms.union(set(stac_collection.properties.get('platform')))
615+
elif stac_collection.get('properties').get('platform'):
616+
platforms = platforms.union(set(stac_collection.get('properties').get('platform')))
619617

620618
token = ''
621619

622620
print('Searching for {} - {} ({}, {}) using {}...'.format(dataset, tile_id, start,
623-
end, stac.url), end='', flush=True)
621+
end, stac.uri), end='', flush=True)
624622

625623
with timing(' total'):
626-
items = stac.search(filter=options)
624+
items = stac.search(**options)
627625

628626
for feature in items['features']:
629627
if feature['type'] == 'Feature':
@@ -632,7 +630,7 @@ def search_images(self, feature: dict, start: str, end: str, tile_id: str, **kwa
632630
identifier = feature['id']
633631
# TODO: Add handler to deal with parse result serializer.
634632
platform = feature['properties'].get('platform')
635-
if stac.url.startswith('https://landsatlook.usgs.gov'):
633+
if stac.uri.startswith('https://landsatlook.usgs.gov'):
636634
# Remove last SR sentence.
637635
identifier = f'{identifier[:-3]}{identifier[-3:].replace("_SR", "")}'
638636
# Special treatment for missing/invalid platform values

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
'rio_cogeo==3.0.2',
7676
'shapely>=1.7,<2',
7777
'SQLAlchemy-Utils>=0.34.2,<1',
78-
'stac.py==0.9.0.post12',
78+
'pystac-client>=0.5',
7979
'MarkupSafe==2.0.1',
8080
'bdc-auth-client @ git+https://github.com/brazil-data-cube/bdc-auth-client.git@v0.2.1#egg=bdc-auth-client'
8181
]

0 commit comments

Comments
 (0)