Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- Added `get_starting_time()` and `get_duration()` methods to `TimeSeries` to get the starting time and duration of the time series. @h-mayorquin [#2146](https://github.com/NeurodataWithoutBorders/pynwb/pull/2146)
- Added `get_starting_time()` and `get_duration()` methods to `TimeIntervals` to get the earliest start time and total duration (span from earliest start to latest stop) of all intervals. @h-mayorquin [#2146](https://github.com/NeurodataWithoutBorders/pynwb/pull/2146)
- Added `get_starting_time()` and `get_duration()` methods to `Units` to get the earliest spike time and total duration (span from earliest to latest spike) across all units. @h-mayorquin [#2164](https://github.com/NeurodataWithoutBorders/pynwb/pull/2164)
- Added remote-read support to `pynwb.read_nwb`. The function now accepts remote URLs (`s3://`, `gs://`, `abfs://`, `https://`, etc.) and dispatches to the right backend based on URL suffix. Anonymous public files just work; credentialed remote access is picked up from the standard cloud-credentials environment (AWS profile, `GOOGLE_APPLICATION_CREDENTIALS`, Azure managed identity, etc.). The module-level signature stays minimal at `read_nwb(path)`. Power-user knobs (forced ROS3 driver, custom S3-compatible endpoints, pre-opened file objects) are available via `NWBHDF5IO.read_nwb(path, backend_kwargs=...)` directly, which gains a new `backend_kwargs` parameter. The existing `fsspec.filesystem("http")` branch in `NWBHDF5IO.read_nwb` that incorrectly handled all remote schemes is now scheme-aware. @h-mayorquin [#2190](https://github.com/NeurodataWithoutBorders/pynwb/pull/2190)

### Fixed
- Fixed invalid CSS properties in documentation assistant toggle that prevented proper positioning on displays ≥1400px wide. @rly [#2151](https://github.com/NeurodataWithoutBorders/pynwb/pull/2151)
Expand Down
8 changes: 6 additions & 2 deletions requirements-opt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,9 @@ fsspec==2025.5.1
requests==2.33.0
aiohttp==3.13.4

# For read_nwb tests
hdmf-zarr
# For read_nwb tests.
# TEMPORARY: pinned to the branch of hdmf-dev/hdmf-zarr#348 which fixes
# `resolve_ref` for fsspec self-references. The remote-Zarr test in
# tests/integration/io/test_read.py depends on this fix. Once #348 lands in
# an hdmf-zarr release, revert this line to `hdmf-zarr`.
hdmf-zarr @ git+https://github.com/hdmf-dev/hdmf-zarr.git@fix_streaming_store
46 changes: 39 additions & 7 deletions src/pynwb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,9 +552,10 @@ def read_nwb(**kwargs):
path = str(path) if path is not None else None

# Streaming case
if path is not None and (path.startswith("s3://") or path.startswith("http")):
if path is not None and path.startswith(_REMOTE_SCHEMES):
import fsspec
fsspec_file_system = fsspec.filesystem("http")
scheme = path.split("://", 1)[0]
fsspec_file_system = fsspec.filesystem(scheme)
ffspec_file = fsspec_file_system.open(path, "rb")

open_file = h5py.File(ffspec_file, "r")
Expand All @@ -566,12 +567,28 @@ def read_nwb(**kwargs):

return nwbfile

_REMOTE_SCHEMES = (
"s3://", # AWS S3 and S3-compatible stores (MinIO, Ceph, Hetzner Object Storage, etc.)
"gs://", # Google Cloud Storage (canonical scheme)
"gcs://", # Google Cloud Storage (alternative scheme)
"az://", # Azure Blob Storage (short form)
"abfs://", # Azure Data Lake Storage Gen2 over HTTP
"abfss://", # Azure Data Lake Storage Gen2 over HTTPS (recommended)
"wasbs://", # Azure WASB (Windows Azure Storage Blob) over HTTPS (legacy)
"http://", # Generic HTTP, including DANDI signed URLs and any HTTP-accessible store
"https://", # Generic HTTPS, same as above with TLS
"ftp://", # FTP
"ftps://", # FTPS (FTP over TLS)
)


@docval({'name': 'path', 'type': (str, Path),
'doc': 'Path to the NWB file. Can be either a local filesystem path to '
'an HDF5 (.nwb) or Zarr (.zarr) file.'},
'doc': ("Path to the NWB file. Can be a local filesystem path to an HDF5 (.nwb) "
"or Zarr (.zarr) file, or a remote URL "
"(`s3://`, `gs://`, `abfs://`, `https://`, etc.).")},
is_method=False)
def read_nwb(**kwargs):
"""Read an NWB file from a local path.
"""Read an NWB file from a local path or remote URL.

High-level interface for reading NWB files. Automatically handles both HDF5
and Zarr formats. For advanced use cases (parallel I/O, custom namespaces),
Expand All @@ -588,25 +605,40 @@ def read_nwb(**kwargs):
* Reads any backend (e.g. HDF5 or Zarr) if there is an IO class available.

Advanced features requiring direct use of IO classes (e.g. NWBHDF5IO NWBZarrIO) include:
* Streaming data from s3
* Custom namespace extensions
* Parallel I/O with MPI
* Custom build managers
* Write or append modes
* Pre-opened HDF5 file objects or Zarr stores
* Remote file access configuration

Example usage reading a local NWB file:
Example usage:

.. code-block:: python

from pynwb import read_nwb
nwbfile = read_nwb("path/to/file.nwb")
nwbfile = read_nwb("s3://bucket/file.nwb")

:Returns: pynwb.NWBFile The loaded NWB file object.
"""

path = popargs('path', kwargs)
path_str = str(path)
is_remote = path_str.startswith(_REMOTE_SCHEMES)

# Remote URL: dispatch by URL shape without probing (can_read cannot reach remote paths)
if is_remote:
path_str = path_str.rstrip("/")
has_zarr_suffix = path_str.endswith(".zarr")
# DANDI publishes some Zarr assets at `<scheme>://dandiarchive/zarr/<uuid>/` with
# no `.zarr` suffix, so suffix matching alone misses them.
is_dandi_zarr = "dandiarchive" in path_str and "/zarr/" in path_str
if has_zarr_suffix or is_dandi_zarr:
from hdmf_zarr import NWBZarrIO
return NWBZarrIO.read_nwb(path=path)
return NWBHDF5IO.read_nwb(path=path)

# HDF5 is always available so we try that first
backend_is_hdf5 = NWBHDF5IO.can_read(path=path)
if backend_is_hdf5:
Expand Down
54 changes: 50 additions & 4 deletions tests/integration/io/test_read.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
import tempfile
import urllib.request

from pynwb import read_nwb
from pynwb.testing.mock.file import mock_NWBFile
Expand All @@ -8,10 +9,16 @@
import unittest
try:
from hdmf_zarr import NWBZarrIO # noqa f401
HAVE_NWBZarrIO = True
HAVE_NWBZarrIO = True
except ImportError:
HAVE_NWBZarrIO = False

try:
import fsspec # noqa: F401
HAVE_FSSPEC = True
except ImportError:
HAVE_FSSPEC = False


class TestReadNWBMethod(TestCase):
"""Test suite for the read_nwb function."""
Expand Down Expand Up @@ -67,11 +74,50 @@ def test_read_invalid_file(self):
with tempfile.TemporaryDirectory() as temp_dir:
path = Path(temp_dir) / "test.txt"
path.write_text("Not an NWB file")

expected_message = (
f"Unable to read file: '{path}'. The file is not recognized as either a valid HDF5 or Zarr NWB file. "
"Please ensure the file exists and contains valid NWB data."
)

with self.assertRaisesWith(ValueError, expected_message):
read_nwb(path=path)
read_nwb(path=path)

@unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
def test_read_nwb_anonymous_remote_hdf5(self):
"""Test reading an anonymous public HDF5 NWB file over HTTPS through fsspec."""
url = (
"https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/"
"11ec8933-1456-4942-922b-94e5878bb991"
)
try:
urllib.request.urlopen(url, timeout=2)
except urllib.request.URLError:
self.skipTest("Internet access to DANDI failed.")

nwbfile = read_nwb(path=url)
self.assertEqual(len(nwbfile.acquisition['TestData'].data[:]), 3)
nwbfile.get_read_io().close()

@unittest.skipIf(not HAVE_NWBZarrIO or not HAVE_FSSPEC, "hdmf-zarr or fsspec not installed")
def test_read_nwb_anonymous_remote_zarr(self):
"""Test reading an anonymous public Zarr NWB file from DANDI through fsspec.

Uses the same DANDI 000719 file as hdmf-zarr's own S3 streaming tutorial (PR #330).
Depends on hdmf-zarr's `resolve_ref` self-reference fix
(https://github.com/hdmf-dev/hdmf-zarr/pull/348); without that fix this read
fails with `PathNotFoundError: nothing found at path ''`.
"""
url = (
"https://dandiarchive.s3.amazonaws.com/zarr/"
"c8c6b848-fbc6-4f58-85ff-e3f2618ee983/"
)
try:
urllib.request.urlopen(url + ".zmetadata", timeout=2)
except urllib.request.URLError:
self.skipTest("Internet access to DANDI failed.")

nwbfile = read_nwb(path=url)
self.assertEqual(nwbfile.identifier, "7208f856-f527-479f-973d-e6e72326a8ea")
self.assertEqual(nwbfile.subject.subject_id, "R6")
nwbfile.get_read_io().close()
Loading