Skip to content

Commit 9818c61

Browse files
Refactor S3 handling in GeoZarr conversion: normalize group paths to fix double slash issue, update spatial chunk and min dimension parameters, and enhance S3 group opening with storage options.
1 parent 43ba30d commit 9818c61

3 files changed

Lines changed: 19 additions & 13 deletions

File tree

.vscode/launch.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040
"https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.zarr",
4141
"s3://esa-zarr-sentinel-explorer/tests-output/eopf_geozarr/s2b_test.zarr",
4242
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l1c_quicklook/r10m",
43-
"--spatial-chunk", "512",
44-
"--min-dimension", "128",
43+
"--spatial-chunk", "4096",
44+
"--min-dimension", "256",
4545
"--tile-width", "256",
4646
"--max-retries", "2",
4747
"--verbose"
@@ -51,8 +51,8 @@
5151
"console": "integratedTerminal",
5252
"env": {
5353
"PYTHONPATH": "${workspaceFolder}/eopf_geozarr/.venv/bin",
54-
"AWS_ACCESS_KEY_ID": "04fd31bb339247658e971fad3a1d879e",
55-
"AWS_SECRET_ACCESS_KEY": "eb1b07f69b1d494abb947aa0bba8587e",
54+
"AWS_ACCESS_KEY_ID": "secret",
55+
"AWS_SECRET_ACCESS_KEY": "secret",
5656
"AWS_DEFAULT_REGION": "gra",
5757
"AWS_S3_ENDPOINT": "https://s3.gra.io.cloud.ovh.net/"
5858
},

eopf_geozarr/conversion/geozarr.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,11 @@ def recursive_copy(
280280
is_dataset = True
281281

282282
# Handle S3 vs local paths for zarr operations
283-
group_path = f"{output_path}/{group_prefix}"
283+
# Fix double slash issue by normalizing the path
284+
if group_prefix.startswith("/"):
285+
group_path = f"{output_path}{group_prefix}"
286+
else:
287+
group_path = f"{output_path}/{group_prefix}"
284288
if s3_utils.is_s3_path(output_path):
285289
# For S3, use storage_options
286290
storage_options = s3_utils.get_s3_storage_options(group_path)
@@ -468,13 +472,14 @@ def write_geozarr_group(
468472

469473
# Handle S3 vs local paths for zarr operations
470474
if s3_utils.is_s3_path(output_path):
471-
# For S3, use the S3 store
472-
store = s3_utils.create_s3_store(group_path)
475+
# For S3, use storage_options
476+
storage_options = s3_utils.get_s3_storage_options(group_path)
473477
dt.to_zarr(
474-
store,
478+
group_path,
475479
mode="a", # Append mode to add to the group
476480
consolidated=False, # No consolidate metadata
477481
zarr_format=3, # Use Zarr format 3
482+
storage_options=storage_options,
478483
)
479484
else:
480485
dt.to_zarr(
@@ -676,9 +681,10 @@ def create_geozarr_compliant_multiscales(
676681

677682
# Handle S3 vs local paths for JSON metadata
678683
if s3_utils.is_s3_path(output_path):
679-
# For S3, use s3fs to read/write JSON
684+
# For S3, use s3fs with proper configuration
680685
import s3fs
681-
fs = s3fs.S3FileSystem(anon=False)
686+
storage_options = s3_utils.get_s3_storage_options(zarr_json_path)
687+
fs = s3fs.S3FileSystem(**storage_options)
682688

683689
with fs.open(zarr_json_path, "r") as f:
684690
zarr_json = json.load(f)

eopf_geozarr/conversion/s3_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def s3_path_exists(s3_path: str, **s3_kwargs) -> bool:
188188

189189
def open_s3_zarr_group(s3_path: str, mode: str = "r", **s3_kwargs) -> zarr.Group:
190190
"""
191-
Open a Zarr group from S3.
191+
Open a Zarr group from S3 using storage_options.
192192
193193
Parameters
194194
----------
@@ -204,8 +204,8 @@ def open_s3_zarr_group(s3_path: str, mode: str = "r", **s3_kwargs) -> zarr.Group
204204
zarr.Group
205205
Zarr group
206206
"""
207-
store = create_s3_store(s3_path, **s3_kwargs)
208-
return zarr.open_group(store, mode=mode, zarr_format=3)
207+
storage_options = get_s3_storage_options(s3_path, **s3_kwargs)
208+
return zarr.open_group(s3_path, mode=mode, zarr_format=3, storage_options=storage_options)
209209

210210

211211
def get_s3_credentials_info() -> Dict[str, Optional[str]]:

0 commit comments

Comments
 (0)