Skip to content

Commit 686e539

Browse files
Refactor GeoZarr conversion utilities
- Updated launch configuration for GeoZarr conversion to use S2L1C instead of S2L2A. - Moved S3 utility functions from `s3_utils.py` to `fs_utils.py` for better organization. - Simplified path handling by introducing unified functions for storage options and path normalization. - Removed redundant S3 checks in various functions, streamlining the code for local and S3 paths. - Enhanced metadata handling with unified read/write functions for JSON metadata across different path types.
1 parent 0524e03 commit 686e539

5 files changed

Lines changed: 262 additions & 211 deletions

File tree

.vscode/launch.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,16 @@
6060
},
6161
{
6262
// eopf_geozarr convert https://objectstore.eodc.eu:2222/e05ab01a9d56408d82ac32d69a5aae2a:sample-data/tutorial_data/cpm_v253/S2B_MSIL1C_20250113T103309_N0511_R108_T32TLQ_20250113T122458.zarr /tmp/tmp7mmjkjk3/s2b_subset_test.zarr --groups /measurements/reflectance/r10m --spatial-chunk 512 --min-dimension 128 --tile-width 256 --max-retries 2 --verbose
63-
"name": "Convert to GeoZarr S2L2A (S3)",
63+
"name": "Convert to GeoZarr S2L1C (S3)",
6464
"type": "debugpy",
6565
"request": "launch",
6666
"module": "eopf_geozarr",
6767
"args": [
6868
"convert",
69-
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202507-s02msil2a/04/products/cpm_v256/S2A_MSIL2A_20250704T094051_N0511_R036_T33SWB_20250704T115824.zarr",
70-
"s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/s2b_test.zarr",
71-
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l2a_quicklook/r10m",
72-
"--spatial-chunk", "4096",
69+
"https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202507-s02msil1c/25/products/cpm_v256/S2A_MSIL1C_20250725T091041_N0511_R050_T34SGE_20250725T101000.zarr",
70+
"s3://esa-zarr-sentinel-explorer-fra/tests-output/eopf_geozarr/S2A_MSIL1C_20250725T091041_N0511_R050_T34SGE_20250725T101000.zarr",
71+
"--groups", "/measurements/reflectance/r10m", "/measurements/reflectance/r20m", "/measurements/reflectance/r60m", "/quality/l1c_quicklook/r10m",
72+
"--spatial-chunk", "1024",
7373
"--min-dimension", "256",
7474
"--tile-width", "256",
7575
"--max-retries", "2",

eopf_geozarr/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import xarray as xr
1414

1515
from . import create_geozarr_dataset
16-
from .conversion import is_s3_path, validate_s3_access, get_s3_credentials_info
16+
from .conversion.fs_utils import is_s3_path, validate_s3_access, get_s3_credentials_info
1717

1818

1919
def setup_dask_cluster(enable_dask: bool, verbose: bool = False) -> Optional[object]:

eopf_geozarr/conversion/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
recursive_copy,
99
setup_datatree_metadata_geozarr_spec_compliant,
1010
)
11-
from .s3_utils import (
11+
from .fs_utils import (
1212
create_s3_store,
1313
get_s3_credentials_info,
1414
is_s3_path,
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,57 @@ def get_s3_storage_options(s3_path: str, **s3_kwargs) -> Dict[str, Any]:
125125
return s3_config
126126

127127

128+
def get_storage_options(path: str, **kwargs) -> Optional[Dict[str, Any]]:
129+
"""
130+
Get storage options for any URL type, leveraging fsspec as the abstraction layer.
131+
132+
This function eliminates the need for if/else branching by returning appropriate
133+
storage options based on the URL protocol.
134+
135+
Parameters
136+
----------
137+
path : str
138+
Path or URL (local path, s3://, etc.)
139+
**kwargs
140+
Additional keyword arguments for the storage backend
141+
142+
Returns
143+
-------
144+
Optional[Dict[str, Any]]
145+
Storage options dictionary for xarray/zarr, or None for local paths
146+
"""
147+
if is_s3_path(path):
148+
return get_s3_storage_options(path, **kwargs)
149+
# For local paths, return None (no storage options needed)
150+
# Future protocols (gcs://, azure://, etc.) can be added here
151+
return None
152+
153+
154+
def normalize_path(path: str) -> str:
155+
"""
156+
Normalize any path type (local or remote URL).
157+
158+
This function handles path normalization for all filesystem types,
159+
ensuring proper path formatting and removing issues like double slashes.
160+
161+
Parameters
162+
----------
163+
path : str
164+
Path to normalize
165+
166+
Returns
167+
-------
168+
str
169+
Normalized path
170+
"""
171+
if is_s3_path(path):
172+
return normalize_s3_path(path)
173+
else:
174+
# For local paths, normalize by removing double slashes and cleaning up
175+
import os.path
176+
return os.path.normpath(path)
177+
178+
128179
def create_s3_store(s3_path: str, **s3_kwargs) -> str:
129180
"""
130181
Create an S3 path with storage options for Zarr operations.
@@ -346,3 +397,123 @@ def validate_s3_access(s3_path: str, **s3_kwargs) -> tuple[bool, Optional[str]]:
346397

347398
except Exception as e:
348399
return False, str(e)
400+
401+
402+
def get_filesystem(path: str, **kwargs):
403+
"""
404+
Get the appropriate fsspec filesystem for any path type.
405+
406+
Parameters
407+
----------
408+
path : str
409+
Path or URL (local path, s3://, etc.)
410+
**kwargs
411+
Additional keyword arguments for the filesystem
412+
413+
Returns
414+
-------
415+
fsspec.AbstractFileSystem
416+
Filesystem instance
417+
"""
418+
import fsspec
419+
420+
if is_s3_path(path):
421+
# Get S3 storage options and use them for fsspec
422+
storage_options = get_s3_storage_options(path, **kwargs)
423+
return fsspec.filesystem('s3', **storage_options)
424+
else:
425+
# For local paths, use the local filesystem
426+
return fsspec.filesystem('file')
427+
428+
429+
def write_json_metadata(path: str, metadata: Dict[str, Any], **kwargs) -> None:
430+
"""
431+
Write JSON metadata to any path type using fsspec.
432+
433+
Parameters
434+
----------
435+
path : str
436+
Path where to write the JSON file (local path or URL)
437+
metadata : dict
438+
Metadata dictionary to write as JSON
439+
**kwargs
440+
Additional keyword arguments for the filesystem
441+
"""
442+
fs = get_filesystem(path, **kwargs)
443+
444+
# Ensure parent directory exists for local paths
445+
if not is_s3_path(path):
446+
parent_dir = os.path.dirname(path)
447+
if parent_dir:
448+
fs.makedirs(parent_dir, exist_ok=True)
449+
450+
# Write JSON content using fsspec
451+
json_content = json.dumps(metadata, indent=2)
452+
with fs.open(path, "w") as f:
453+
f.write(json_content)
454+
455+
456+
def read_json_metadata(path: str, **kwargs) -> Dict[str, Any]:
457+
"""
458+
Read JSON metadata from any path type using fsspec.
459+
460+
Parameters
461+
----------
462+
path : str
463+
Path to the JSON file (local path or URL)
464+
**kwargs
465+
Additional keyword arguments for the filesystem
466+
467+
Returns
468+
-------
469+
dict
470+
Parsed JSON metadata
471+
"""
472+
fs = get_filesystem(path, **kwargs)
473+
474+
with fs.open(path, "r") as f:
475+
content = f.read()
476+
477+
return json.loads(content)
478+
479+
480+
def path_exists(path: str, **kwargs) -> bool:
481+
"""
482+
Check if a path exists using fsspec.
483+
484+
Parameters
485+
----------
486+
path : str
487+
Path to check (local path or URL)
488+
**kwargs
489+
Additional keyword arguments for the filesystem
490+
491+
Returns
492+
-------
493+
bool
494+
True if the path exists
495+
"""
496+
fs = get_filesystem(path, **kwargs)
497+
return fs.exists(path)
498+
499+
500+
def open_zarr_group(path: str, mode: str = "r", **kwargs) -> zarr.Group:
501+
"""
502+
Open a Zarr group from any path type using unified storage options.
503+
504+
Parameters
505+
----------
506+
path : str
507+
Path to the Zarr group (local path or URL)
508+
mode : str, default "r"
509+
Access mode ("r", "r+", "w", "a")
510+
**kwargs
511+
Additional keyword arguments for the storage backend
512+
513+
Returns
514+
-------
515+
zarr.Group
516+
Zarr group
517+
"""
518+
storage_options = get_storage_options(path, **kwargs)
519+
return zarr.open_group(path, mode=mode, zarr_format=3, storage_options=storage_options)

0 commit comments

Comments
 (0)