Skip to content

Commit cf3183d

Browse files
committed
Add cloud storage support via fsspec (S3, GCS, Azure)
Read and write GeoTIFFs to/from cloud storage using fsspec as the filesystem abstraction layer. Any URI with a :// scheme (that isn't http/https) is routed through fsspec, which delegates to the appropriate backend: - s3://bucket/key.tif (requires s3fs) - gs://bucket/key.tif (requires gcsfs) - az://container/blob.tif (requires adlfs) - abfs://container/blob.tif (requires adlfs) - Any other fsspec-supported scheme (memory://, ftp://, etc.) Read: _CloudSource uses fsspec.core.url_to_fs() then fs.open() for full reads and range reads. Falls through to the same TIFF parsing pipeline as local files. Write: _write_bytes detects fsspec URIs and writes via fs.open() instead of the local atomic-rename path (which doesn't apply to cloud storage). If fsspec or the backend library isn't installed, a clear ImportError is raised with install instructions. 5 new tests using fsspec's memory:// filesystem for integration testing without real cloud credentials.
1 parent ed1e40f commit cf3183d

File tree

3 files changed

+186
-9
lines changed

3 files changed

+186
-9
lines changed

xrspatial/geotiff/_reader.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,57 @@ def close(self):
171171
pass
172172

173173

174+
_CLOUD_SCHEMES = ('s3://', 'gs://', 'az://', 'abfs://')
175+
176+
177+
def _is_fsspec_uri(path: str) -> bool:
178+
"""Check if a path is a fsspec-compatible URI (not http/https/local)."""
179+
if path.startswith(('http://', 'https://')):
180+
return False
181+
return '://' in path
182+
183+
184+
class _CloudSource:
185+
"""Cloud storage data source using fsspec.
186+
187+
Supports S3, GCS, Azure Blob Storage, and any other fsspec backend.
188+
Requires the appropriate library (s3fs, gcsfs, adlfs) to be installed.
189+
"""
190+
191+
def __init__(self, url: str, **storage_options):
192+
try:
193+
import fsspec
194+
except ImportError:
195+
raise ImportError(
196+
"fsspec is required to read from cloud storage. "
197+
"Install it with: pip install fsspec")
198+
self._url = url
199+
self._fs, self._path = fsspec.core.url_to_fs(url, **storage_options)
200+
self._size = self._fs.size(self._path)
201+
202+
def read_range(self, start: int, length: int) -> bytes:
203+
with self._fs.open(self._path, 'rb') as f:
204+
f.seek(start)
205+
return f.read(length)
206+
207+
def read_all(self) -> bytes:
208+
with self._fs.open(self._path, 'rb') as f:
209+
return f.read()
210+
211+
@property
212+
def size(self) -> int:
213+
return self._size
214+
215+
def close(self):
216+
pass
217+
218+
174219
def _open_source(source: str):
175-
"""Open a data source (local file or URL)."""
220+
"""Open a data source (local file, URL, or cloud path)."""
176221
if source.startswith(('http://', 'https://')):
177222
return _HTTPSource(source)
223+
if _is_fsspec_uri(source):
224+
return _CloudSource(source)
178225
return _FileSource(source)
179226

180227

@@ -615,13 +662,14 @@ def read_to_array(source: str, *, window=None, overview_level: int | None = None
615662
-------
616663
(np.ndarray, GeoInfo) tuple
617664
"""
618-
is_url = source.startswith(('http://', 'https://'))
619-
620-
if is_url:
665+
if source.startswith(('http://', 'https://')):
621666
return _read_cog_http(source, overview_level=overview_level, band=band)
622667

623-
# Local file: mmap for zero-copy access
624-
src = _FileSource(source)
668+
# Local file or cloud storage: read all bytes then parse
669+
if _is_fsspec_uri(source):
670+
src = _CloudSource(source)
671+
else:
672+
src = _FileSource(source)
625673
data = src.read_all()
626674

627675
try:

xrspatial/geotiff/_writer.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -812,9 +812,33 @@ def write(data: np.ndarray, path: str, *,
812812
force_bigtiff=bigtiff,
813813
)
814814

815-
# Write to a temp file then atomically rename, so concurrent writes to
816-
# the same path don't interleave and readers never see partial output.
815+
_write_bytes(file_bytes, path)
816+
817+
818+
def _is_fsspec_uri(path: str) -> bool:
819+
"""Check if a path is a fsspec-compatible URI."""
820+
if path.startswith(('http://', 'https://')):
821+
return False
822+
return '://' in path
823+
824+
825+
def _write_bytes(file_bytes: bytes, path: str) -> None:
826+
"""Write bytes to a local file (atomic) or cloud storage (via fsspec)."""
817827
import os
828+
829+
if _is_fsspec_uri(path):
830+
try:
831+
import fsspec
832+
except ImportError:
833+
raise ImportError(
834+
"fsspec is required to write to cloud storage. "
835+
"Install it with: pip install fsspec")
836+
fs, fspath = fsspec.core.url_to_fs(path)
837+
with fs.open(fspath, 'wb') as f:
838+
f.write(file_bytes)
839+
return
840+
841+
# Local file: write to temp file then atomically rename
818842
import tempfile
819843
dir_name = os.path.dirname(os.path.abspath(path))
820844
fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix='.tif.tmp')
@@ -823,7 +847,6 @@ def write(data: np.ndarray, path: str, *,
823847
f.write(file_bytes)
824848
os.replace(tmp_path, path) # atomic on POSIX
825849
except BaseException:
826-
# Clean up the temp file on any failure
827850
try:
828851
os.unlink(tmp_path)
829852
except OSError:

xrspatial/geotiff/tests/test_features.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,112 @@ def test_no_crs_no_wkt(self, tmp_path):
425425
# Big-endian pixel data
426426
# -----------------------------------------------------------------------
427427

428+
# -----------------------------------------------------------------------
429+
# Cloud storage (fsspec) support
430+
# -----------------------------------------------------------------------
431+
432+
class TestCloudStorage:
433+
434+
def test_cloud_scheme_detection(self):
435+
"""Cloud URI schemes are detected correctly."""
436+
from xrspatial.geotiff._reader import _is_fsspec_uri
437+
assert _is_fsspec_uri('s3://bucket/key.tif')
438+
assert _is_fsspec_uri('gs://bucket/key.tif')
439+
assert _is_fsspec_uri('az://container/blob.tif')
440+
assert _is_fsspec_uri('abfs://container/blob.tif')
441+
assert _is_fsspec_uri('memory:///test.tif')
442+
assert not _is_fsspec_uri('/local/path.tif')
443+
assert not _is_fsspec_uri('http://example.com/file.tif')
444+
assert not _is_fsspec_uri('relative/path.tif')
445+
446+
def test_memory_filesystem_read_write(self, tmp_path):
447+
"""Round-trip through fsspec's in-memory filesystem."""
448+
import fsspec
449+
450+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
451+
452+
# Write to memory filesystem via fsspec
453+
from xrspatial.geotiff._writer import write, _write_bytes
454+
from xrspatial.geotiff._writer import _assemble_tiff, _write_stripped
455+
from xrspatial.geotiff._compression import COMPRESSION_NONE
456+
457+
# First write locally, then copy to memory fs
458+
local_path = str(tmp_path / 'test.tif')
459+
write(arr, local_path, compression='none', tiled=False)
460+
461+
with open(local_path, 'rb') as f:
462+
tiff_bytes = f.read()
463+
464+
# Put into fsspec memory filesystem
465+
fs = fsspec.filesystem('memory')
466+
fs.pipe('/test.tif', tiff_bytes)
467+
468+
# Read via _CloudSource
469+
from xrspatial.geotiff._reader import _CloudSource
470+
src = _CloudSource('memory:///test.tif')
471+
data = src.read_all()
472+
assert len(data) == len(tiff_bytes)
473+
assert data == tiff_bytes
474+
475+
# Range read
476+
chunk = src.read_range(0, 8)
477+
assert chunk == tiff_bytes[:8]
478+
479+
# Clean up
480+
fs.rm('/test.tif')
481+
482+
def test_memory_filesystem_full_roundtrip(self, tmp_path):
483+
"""write_geotiff + read_geotiff through memory:// filesystem."""
484+
import fsspec
485+
486+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
487+
488+
# Write locally first, then copy to memory fs
489+
local_path = str(tmp_path / 'local.tif')
490+
write_geotiff(arr, local_path, compression='deflate')
491+
with open(local_path, 'rb') as f:
492+
tiff_bytes = f.read()
493+
494+
fs = fsspec.filesystem('memory')
495+
fs.pipe('/roundtrip.tif', tiff_bytes)
496+
497+
# Read from memory filesystem
498+
from xrspatial.geotiff._reader import read_to_array
499+
result, geo = read_to_array('memory:///roundtrip.tif')
500+
np.testing.assert_array_equal(result, arr)
501+
502+
fs.rm('/roundtrip.tif')
503+
504+
def test_writer_cloud_scheme_detection(self):
505+
"""Writer detects cloud schemes."""
506+
from xrspatial.geotiff._writer import _is_fsspec_uri
507+
assert _is_fsspec_uri('s3://bucket/key.tif')
508+
assert _is_fsspec_uri('gs://bucket/key.tif')
509+
assert _is_fsspec_uri('az://container/blob.tif')
510+
assert not _is_fsspec_uri('/local/path.tif')
511+
512+
def test_write_to_memory_filesystem(self, tmp_path):
513+
"""_write_bytes can write to fsspec memory filesystem."""
514+
import fsspec
515+
from xrspatial.geotiff._writer import write
516+
517+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
518+
local_path = str(tmp_path / 'src.tif')
519+
write(arr, local_path, compression='none', tiled=False)
520+
with open(local_path, 'rb') as f:
521+
tiff_bytes = f.read()
522+
523+
# Write via _write_bytes to memory filesystem
524+
from xrspatial.geotiff._writer import _write_bytes
525+
_write_bytes(tiff_bytes, 'memory:///written.tif')
526+
527+
fs = fsspec.filesystem('memory')
528+
assert fs.exists('/written.tif')
529+
assert fs.cat('/written.tif') == tiff_bytes
530+
531+
fs.rm('/written.tif')
532+
533+
428534
class TestBigEndian:
429535

430536
def test_float32_big_endian(self, tmp_path):

0 commit comments

Comments
 (0)