Skip to content

Commit 8f089f7

Browse files
Progress bars for ingestion/deletion (#188)
* Rename timeseries dataset and collection to just dataset/collection * Ingest/delete in batches * Prepare release v0.35
1 parent 941e5e5 commit 8f089f7

14 files changed

Lines changed: 2283 additions & 2146 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.35.0] - 2025-04-29
11+
1012
## Added
1113

1214
- Support for ingestion of enum values into datasets
15+
- Pagination support for ingestion / deletion of large numbers of datapoints
16+
17+
## Changed
18+
19+
- Renamed `TimeseriesDataset` to `DatasetClient`
20+
- Renamed `TimeseriesCollection` to `CollectionClient`
1321

1422
## [0.34.0] - 2025-04-15
1523

@@ -153,7 +161,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
153161
- Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`
154162

155163

156-
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.34.0...HEAD
164+
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.35.0...HEAD
165+
[0.35.0]: https://github.com/tilebox/tilebox-python/compare/v0.34.0...v0.35.0
157166
[0.34.0]: https://github.com/tilebox/tilebox-python/compare/v0.33.1...v0.34.0
158167
[0.33.1]: https://github.com/tilebox/tilebox-python/compare/v0.33.0...v0.33.1
159168
[0.33.0]: https://github.com/tilebox/tilebox-python/compare/v0.32.1...v0.33.0

tilebox-datasets/tests/test_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from _tilebox.grpc.error import NotFoundError
1111
from _tilebox.grpc.replay import open_recording_channel, open_replay_channel
12-
from tilebox.datasets import Client, TimeseriesDataset
12+
from tilebox.datasets import Client, DatasetClient
1313
from tilebox.datasets.data.datapoint import DatapointPage
1414
from tilebox.datasets.data.time_interval import us_to_datetime
1515

@@ -53,7 +53,7 @@ def test_list_datasets() -> None:
5353

5454
datasets = client.datasets()
5555
# let's check that we can access a dataset
56-
assert isinstance(datasets.open_data.copernicus.sentinel2_msi, TimeseriesDataset)
56+
assert isinstance(datasets.open_data.copernicus.sentinel2_msi, DatasetClient)
5757
# let's check that the repr contains the summaries of the datasets
5858
assert "sentinel2_msi" in repr(datasets)
5959
assert "Sentinel-2 is equipped with an optical instrument payload that samples" in repr(datasets)

tilebox-datasets/tests/test_timeseries.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from tests.data.collection import collection_infos, collection_names
1515
from tests.data.datapoint import datapoints, paginated_datapoint_for_interval_responses
1616
from tests.data.datasets import example_dataset_type
17-
from tilebox.datasets import TimeseriesCollection, TimeseriesDataset
17+
from tilebox.datasets import CollectionClient, DatasetClient
1818
from tilebox.datasets.data.collection import Collection, CollectionInfo
1919
from tilebox.datasets.data.datapoint import Datapoint, DatapointPage
2020
from tilebox.datasets.data.datasets import Dataset
@@ -37,13 +37,13 @@
3737
from tilebox.datasets.service import TileboxDatasetService
3838

3939

40-
def _mocked_dataset() -> tuple[TimeseriesDataset, MagicMock]:
40+
def _mocked_dataset() -> tuple[DatasetClient, MagicMock]:
4141
service = MagicMock()
4242

4343
# we do not sample/draw from datasets() here, because the values themselves are irrelevant for the tests
4444
# (we are not testing properties, but rather writing conventional unit tests here, so it doesn't make sense to
4545
# run them multiple times)
46-
dataset = TimeseriesDataset(
46+
dataset = DatasetClient(
4747
service,
4848
Dataset(
4949
id=uuid4(),
@@ -85,7 +85,7 @@ def test_timeseries_dataset_list_collections(infos: list[CollectionInfo]) -> Non
8585

8686
for info in infos:
8787
collection = collections[info.collection.name]
88-
assert isinstance(collection, TimeseriesCollection), "Expected a RemoteTimeseriesDatasetCollection"
88+
assert isinstance(collection, CollectionClient), "Expected a RemoteTimeseriesDatasetCollection"
8989
assert collection.name == info.collection.name, "Name mismatch in collection"
9090
assert repr(info) in repr(collection), "Expected info to be in collection repr"
9191
assert collection._info == info, "Expected info to be cached"
@@ -108,9 +108,9 @@ def test_timeseries_dataset_get_collection(collection_name: str) -> None:
108108

109109
@dataclass
110110
class MockedCollection:
111-
dataset: TimeseriesDataset
111+
dataset: DatasetClient
112112
dataset_info: Dataset
113-
collection: TimeseriesCollection
113+
collection: CollectionClient
114114
collection_info: CollectionInfo
115115
service: MagicMock
116116

@@ -312,31 +312,31 @@ def __init__(self) -> None:
312312
self.dataset_client = dataset_client
313313
self.count_collections = 0
314314

315-
inserted_collections: Bundle[TimeseriesCollection] = Bundle("collections")
315+
inserted_collections: Bundle[CollectionClient] = Bundle("collections")
316316

317317
@rule(target=inserted_collections, collection=collection_infos())
318-
def get_or_create_collection_enfore_create(self, collection: CollectionInfo) -> TimeseriesCollection:
318+
def get_or_create_collection_enfore_create(self, collection: CollectionInfo) -> CollectionClient:
319319
collections = self.dataset_client.collections()
320320
assume(collection.collection.name not in collections)
321321

322322
self.count_collections += 1
323323
return self.dataset_client.get_or_create_collection(collection.collection.name)
324324

325325
@rule(collection=inserted_collections)
326-
def get_or_create_collection_enfore_get(self, collection: TimeseriesCollection) -> None:
326+
def get_or_create_collection_enfore_get(self, collection: CollectionClient) -> None:
327327
got = self.dataset_client.get_or_create_collection(collection.name)
328328
assert got.info() == collection.info()
329329

330330
@rule(target=inserted_collections, collection=collection_infos())
331-
def create_collection(self, collection: CollectionInfo) -> TimeseriesCollection:
331+
def create_collection(self, collection: CollectionInfo) -> CollectionClient:
332332
collections = self.dataset_client.collections()
333333
assume(collection.collection.name not in collections)
334334

335335
self.count_collections += 1
336336
return self.dataset_client.create_collection(collection.collection.name)
337337

338338
@rule(collection=inserted_collections)
339-
def get_collection(self, collection: TimeseriesCollection) -> None:
339+
def get_collection(self, collection: CollectionClient) -> None:
340340
got = self.dataset_client.collection(collection.name)
341341
assert got.info() == collection.info()
342342

tilebox-datasets/tilebox/datasets/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
from loguru import logger
55

6+
# only here for backwards compatibility, to preserve backwards compatibility with older imports
7+
from tilebox.datasets.aio.timeseries import TimeseriesCollection, TimeseriesDataset
68
from tilebox.datasets.sync.client import Client
7-
from tilebox.datasets.sync.timeseries import TimeseriesCollection, TimeseriesDataset
9+
from tilebox.datasets.sync.dataset import CollectionClient, DatasetClient
810

9-
__all__ = ["Client", "TimeseriesCollection", "TimeseriesDataset"]
11+
__all__ = ["Client", "CollectionClient", "DatasetClient", "TimeseriesCollection", "TimeseriesDataset"]
1012

1113

1214
def _init_logging(level: str = "INFO") -> None:
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from tilebox.datasets.aio.client import Client
2+
from tilebox.datasets.aio.dataset import CollectionClient, DatasetClient
3+
4+
# only here for backwards compatibility, to preserve backwards compatibility with older imports
25
from tilebox.datasets.aio.timeseries import TimeseriesCollection, TimeseriesDataset
36

4-
__all__ = ["Client", "TimeseriesCollection", "TimeseriesDataset"]
7+
__all__ = ["Client", "CollectionClient", "DatasetClient", "TimeseriesCollection", "TimeseriesDataset"]

tilebox-datasets/tilebox/datasets/aio/client.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from _tilebox.grpc.aio.channel import open_channel
44
from _tilebox.grpc.aio.error import with_pythonic_errors
5-
from tilebox.datasets.aio.timeseries import TimeseriesDataset
5+
from tilebox.datasets.aio.dataset import DatasetClient
66
from tilebox.datasets.client import Client as BaseClient
77
from tilebox.datasets.client import token_from_env
88
from tilebox.datasets.datasetsv1.collections_pb2_grpc import CollectionServiceStub
@@ -33,10 +33,10 @@ def __init__(self, *, url: str = "https://api.tilebox.com", token: str | None =
3333
self._client = BaseClient(service)
3434

3535
async def datasets(self) -> Group:
36-
return await self._client.datasets(TimeseriesDataset)
36+
return await self._client.datasets(DatasetClient)
3737

38-
async def dataset(self, slug: str) -> TimeseriesDataset:
39-
return await self._client.dataset(slug, TimeseriesDataset)
38+
async def dataset(self, slug: str) -> DatasetClient:
39+
return await self._client.dataset(slug, DatasetClient)
4040

41-
async def _dataset_by_id(self, dataset_id: str | UUID) -> TimeseriesDataset:
42-
return await self._client._dataset_by_id(dataset_id, TimeseriesDataset) # noqa: SLF001
41+
async def _dataset_by_id(self, dataset_id: str | UUID) -> DatasetClient:
42+
return await self._client._dataset_by_id(dataset_id, DatasetClient) # noqa: SLF001

0 commit comments

Comments
 (0)