Skip to content

Commit 06be133

Browse files
Add quicklook to copernicus client
1 parent 3794b11 commit 06be133

5 files changed

Lines changed: 167 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- `tilebox-storage`: Added `USGSLandsatStorageClient` to download landsat data from the USGS Landsat S3 bucket.
13+
- `tilebox-storage`: Storage client now support concurrent downloads of multiple objects, controlled by the
14+
`max_concurrent_downloads` parameter.
15+
- `tilebox-storage`: Added `quicklook` and `download_quicklook` methods to the `CopernicusStorageClient` to download and
16+
display preview images for Sentinel data.
17+
1018
## [0.41.0] - 2025-08-01
1119

1220
### Added

tilebox-storage/tests/storage_data.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,14 @@ def alphanumerical_text(draw: DrawFn, min_size: int = 1, max_size: int = 100) ->
5151
@composite
5252
def umbra_granules(draw: DrawFn) -> UmbraStorageGranule:
5353
"""Generate a realistic-looking random Umbra granule."""
54-
level = "L0"
5554
time = draw(datetimes(min_value=datetime(1990, 1, 1), max_value=datetime(2025, 1, 1), timezones=just(None)))
5655
number = draw(integers(min_value=1, max_value=2))
5756
text_location = draw(alphanumerical_text(min_size=1, max_size=20))
5857
granule_id = str(draw(uuids(version=4)))
5958
granule_name = f"{time:%Y-%m-%d-%H-%M-%S}_UMBRA-{number:02d}"
6059
location = str(Path(text_location) / granule_id / granule_name)
6160

62-
return UmbraStorageGranule(time, granule_name, level, location)
61+
return UmbraStorageGranule(time, granule_name, location)
6362

6463

6564
@composite
@@ -85,8 +84,7 @@ def s5p_granules(draw: DrawFn) -> CopernicusStorageGranule:
8584
# /eodata/Sentinel-5P/TROPOMI/L2__AER_LH/2024/04/15/S5P_NRTI_L2__AER_LH_20240415T055540_20240415T060040_33707_03_020600_20240415T063447
8685
location = f"/eodata/Sentinel-5P/{instrument}/{product_type}/{start:%Y}/{start:%m}/{start:%d}/{granule_name.removesuffix('.nc')}"
8786

88-
file_size = draw(integers(min_value=10_000, max_value=999_999_999))
89-
return CopernicusStorageGranule(start, granule_name, location, file_size)
87+
return CopernicusStorageGranule(start, granule_name, location)
9088

9189

9290
@composite

tilebox-storage/tests/test_granule.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
UmbraStorageGranule,
1212
USGSLandsatStorageGranule,
1313
_asf_download_urls,
14+
_thumbnail_relative_to_eodata_location,
1415
)
1516

1617

@@ -59,7 +60,6 @@ def _umbra_granule_to_datapoint(granule: UmbraStorageGranule) -> xr.Dataset:
5960
datapoint = xr.Dataset()
6061
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
6162
datapoint["granule_name"] = granule.granule_name
62-
datapoint["processing_level"] = granule.processing_level
6363
datapoint["location"] = granule.location
6464
return datapoint
6565

@@ -82,12 +82,51 @@ def test_granule_from_umbra_datapoints(granules: list[UmbraStorageGranule]) -> N
8282
assert UmbraStorageGranule.from_data(dataset.isel(time=i)) == granules[i]
8383

8484

85+
@pytest.mark.parametrize(
86+
("thumbnail_url", "location", "expected"),
87+
[
88+
(
89+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE/preview/thumbnail.png",
90+
"/eodata/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE",
91+
"preview/thumbnail.png",
92+
),
93+
(
94+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
95+
"/eodata/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE",
96+
"S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
97+
),
98+
(
99+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3/quicklook.jpg",
100+
"/eodata/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3",
101+
"quicklook.jpg",
102+
),
103+
(
104+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3/quicklook.jpg",
105+
"/eodata/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3",
106+
"quicklook.jpg",
107+
),
108+
(
109+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3/quicklook.jpg",
110+
"/eodata/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3",
111+
"quicklook.jpg",
112+
),
113+
],
114+
)
115+
def test_thumbnail_relative_to_eodata_location(thumbnail_url: str, location: str, expected: str) -> None:
116+
assert (
117+
_thumbnail_relative_to_eodata_location(
118+
thumbnail_url,
119+
location,
120+
)
121+
== expected
122+
)
123+
124+
85125
def _copernicus_granule_to_datapoint(granule: CopernicusStorageGranule) -> xr.Dataset:
86126
datapoint = xr.Dataset()
87127
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
88128
datapoint["granule_name"] = granule.granule_name
89129
datapoint["location"] = granule.location
90-
datapoint["file_size"] = granule.file_size
91130
return datapoint
92131

93132

tilebox-storage/tilebox/storage/aio.py

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,23 @@ async def list_objects(self, datapoint: xr.Dataset | CopernicusStorageGranule) -
595595
596596
Returns:
597597
List of object keys available for the given datapoint, relative to the granule location."""
598-
return await list_object_paths(self._store, _copernicus_s3_prefix(datapoint))
598+
return await self._list_objects(datapoint)
599+
600+
async def _list_objects(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> list[str]:
601+
"""List all available objects for a given datapoint.
602+
603+
Args:
604+
datapoint: The datapoint to list available objects the data for.
605+
606+
Returns:
607+
List of object keys available for the given datapoint, relative to the granule location."""
608+
609+
granule = CopernicusStorageGranule.from_data(datapoint)
610+
# special handling for Sentinel-5P, where the location is not a folder but a single file
611+
if granule.location.endswith(".nc"):
612+
return [Path(granule.granule_name).name]
613+
614+
return await list_object_paths(self._store, _copernicus_s3_prefix(granule))
599615

600616
async def download(
601617
self,
@@ -615,8 +631,10 @@ async def download(
615631
Returns:
616632
The path to the downloaded data directory.
617633
"""
618-
all_objects = await list_object_paths(self._store, _copernicus_s3_prefix(datapoint))
619-
return await self._download_objects(datapoint, all_objects, output_dir, show_progress, max_concurrent_downloads)
634+
granule = CopernicusStorageGranule.from_data(datapoint)
635+
636+
all_objects = await self._list_objects(granule)
637+
return await self._download_objects(granule, all_objects, output_dir, show_progress, max_concurrent_downloads)
620638

621639
async def download_objects(
622640
self,
@@ -652,7 +670,14 @@ async def _download_objects(
652670
show_progress: bool = True,
653671
max_concurrent_downloads: int = 4,
654672
) -> Path:
655-
prefix = _copernicus_s3_prefix(datapoint)
673+
granule = CopernicusStorageGranule.from_data(datapoint)
674+
prefix = _copernicus_s3_prefix(granule)
675+
single_file = False
676+
677+
# special handling for Sentinel-5P, where the location is not a folder but a single file
678+
if granule.location.endswith(".nc"):
679+
single_file = True
680+
prefix = str(Path(prefix).parent)
656681

657682
base_folder = output_dir or self._cache
658683
if base_folder is None:
@@ -663,8 +688,61 @@ async def _download_objects(
663688
return output_folder
664689

665690
await download_objects(self._store, prefix, objects, output_folder, show_progress, max_concurrent_downloads)
691+
if single_file:
692+
return output_folder / objects[0]
666693
return output_folder
667694

695+
async def download_quicklook(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> Path:
696+
"""Download the quicklook image for a given datapoint.
697+
698+
Args:
699+
datapoint: The datapoint to download the quicklook for.
700+
701+
Raises:
702+
ValueError: If no quicklook is available for the given datapoint.
703+
704+
Returns:
705+
The path to the downloaded quicklook image.
706+
"""
707+
return await self._download_quicklook(datapoint)
708+
709+
async def quicklook(
710+
self, datapoint: xr.Dataset | CopernicusStorageGranule, width: int = 600, height: int = 600
711+
) -> None:
712+
"""Display the quicklook image for a given datapoint.
713+
714+
Requires an IPython kernel to be running. If you are not using IPython, use download_quicklook instead.
715+
716+
Args:
717+
datapoint: The datapoint to download the quicklook for.
718+
width: Display width of the image in pixels. Defaults to 600.
719+
height: Display height of the image in pixels. Defaults to 600.
720+
721+
Raises:
722+
ImportError: In case IPython is not available.
723+
ValueError: If no quicklook is available for the given datapoint.
724+
"""
725+
if Image is None:
726+
raise ImportError("IPython is not available, please use download_preview instead.")
727+
granule = CopernicusStorageGranule.from_data(datapoint)
728+
quicklook = await self._download_quicklook(granule)
729+
_display_quicklook(quicklook, width, height, f"<code>{granule.granule_name} © ESA {granule.time.year}</code>")
730+
731+
async def _download_quicklook(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> Path:
732+
granule = CopernicusStorageGranule.from_data(datapoint)
733+
if granule.thumbnail is None:
734+
raise ValueError(f"No quicklook available for {granule.granule_name}")
735+
736+
prefix = _copernicus_s3_prefix(granule)
737+
output_folder = (
738+
self._cache / self._STORAGE_PROVIDER / Path(prefix)
739+
if self._cache is not None
740+
else Path.cwd() / self._STORAGE_PROVIDER
741+
)
742+
743+
await download_objects(self._store, prefix, [granule.thumbnail], output_folder, show_progress=False)
744+
return output_folder / granule.thumbnail
745+
668746

669747
def _landsat_s3_prefix(datapoint: xr.Dataset | USGSLandsatStorageGranule) -> str:
670748
granule = USGSLandsatStorageGranule.from_data(datapoint)

tilebox-storage/tilebox/storage/granule.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from dataclasses import dataclass
22
from datetime import datetime
3+
from pathlib import Path
34

45
import xarray as xr
56

@@ -64,7 +65,6 @@ def _asf_download_urls(granule_name: str) -> StorageURLs:
6465
class UmbraStorageGranule:
6566
time: datetime
6667
granule_name: str
67-
processing_level: str
6868
location: str
6969

7070
@classmethod
@@ -84,17 +84,34 @@ def from_data(cls, dataset: "xr.Dataset | UmbraStorageGranule") -> "UmbraStorage
8484
return cls(
8585
time,
8686
dataset.granule_name.item(),
87-
dataset.processing_level.item(),
8887
dataset.location.item(),
8988
)
9089

9190

91+
def _thumbnail_relative_to_eodata_location(thumbnail_url: str, location: str) -> str:
92+
"""
93+
Returns a thumbnail path from a URL as a path relative to a storage location.
94+
95+
For example:
96+
>>> _thumbnail_relative_to_location(
97+
>>> "https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE/preview/thumbnail.png",
98+
>>> "/eodata/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE"
99+
>>> )
100+
"preview/thumbnail.png"
101+
"""
102+
103+
url_path = thumbnail_url.split("?path=")[-1]
104+
url_path = url_path.removeprefix("/")
105+
location = location.removeprefix("/eodata/")
106+
return str(Path(url_path).relative_to(location))
107+
108+
92109
@dataclass
93110
class CopernicusStorageGranule:
94111
time: datetime
95112
granule_name: str
96113
location: str
97-
file_size: int
114+
thumbnail: str | None = None
98115

99116
@classmethod
100117
def from_data(cls, dataset: "xr.Dataset | CopernicusStorageGranule") -> "CopernicusStorageGranule":
@@ -110,11 +127,23 @@ def from_data(cls, dataset: "xr.Dataset | CopernicusStorageGranule") -> "Coperni
110127

111128
time = datetime.combine(dataset.time.dt.date.item(), dataset.time.dt.time.item())
112129

130+
location = dataset.location.item()
131+
132+
thumbnail_path = None
133+
if "thumbnail" in dataset:
134+
thumbnail_path = dataset.thumbnail.item().strip()
135+
136+
thumbnail = (
137+
_thumbnail_relative_to_eodata_location(thumbnail_path, location)
138+
if isinstance(thumbnail_path, str) and len(thumbnail_path) > 0
139+
else None
140+
)
141+
113142
return cls(
114143
time,
115144
dataset.granule_name.item(),
116-
dataset.location.item(),
117-
dataset.file_size.item(),
145+
location,
146+
thumbnail,
118147
)
119148

120149

0 commit comments

Comments
 (0)