NeurodataWithoutBorders · rly · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/.github/workflows/run_dandi_read_tests.yml b/.github/workflows/run_dandi_read_tests.yml
@@ -1,10 +1,7 @@
 name: Run DANDI read tests
 on:
-  # NOTE this is disabled until we can run this systematically instead of randomly
-  # so we don't get constant error notifications and waste compute cycles
-  # See https://github.com/NeurodataWithoutBorders/pynwb/issues/1804
-  # schedule:
-  #   - cron: '0 6 * * *'  # once per day at 1am ET
+  schedule:
+    - cron: '0 6 * * *'  # once per day at 1am ET
   workflow_dispatch:
 
 concurrency:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## PyNWB 4.0.0 (Upcoming)
 
 ### Documentation and tutorial enhancements
+- Added a tutorial on using HERD to annotate an NWB file with external resources and store it at `/general/external_resources`, plus a companion example showing how to annotate multiple NWB files streamed from a DANDI dandiset with a single HERD. @rly, @mavaylon1 [#2200](https://github.com/NeurodataWithoutBorders/pynwb/pull/2200)
 - Added `pandas.ExtensionArray` to `nitpick_ignore` so the Sphinx build does not fail on the unresolved cross-reference that HDMF's `array_data` docval macro renders for every type that accepts array data. @rly [#2209](https://github.com/NeurodataWithoutBorders/pynwb/pull/2209)
 - Added `app.readthedocs.org/projects/pynwb/*` to `linkcheck_ignore` to stop the Sphinx linkcheck CI job from intermittently failing when GitHub Actions runners get throttled by readthedocs. @h-mayorquin [#2191](https://github.com/NeurodataWithoutBorders/pynwb/pull/2191)
 - Added documentation for `ExternalImage` to the images tutorial. @h-mayorquin [#2159](https://github.com/NeurodataWithoutBorders/pynwb/pull/2159)

diff --git a/docs/gallery/general/plot_external_resources.py b/docs/gallery/general/plot_external_resources.py
@@ -0,0 +1,177 @@
+"""
+.. _external_resources:
+
+Linking to External Resources (HERD)
+====================================
+
+The :py:class:`~pynwb.resources.HERD` (HDMF External Resources Data Structure) class lets you map
+terms used in your data to entities defined in external, web-accessible resources such as
+ontologies. For example, you may store a species name ``"Mus musculus"`` on a
+:py:class:`~pynwb.file.Subject` and want to link it to the corresponding NCBI Taxonomy term so that
+the value is standardized and easy to query.
+
+From a user's perspective, a HERD can be treated as a single table that associates a ``key`` (a term
+used on an ``object``, i.e. a dataset or attribute in the file) with an ``entity`` (a term in an
+external resource, identified by an ``entity_id`` and an ``entity_uri``). Internally, HERD stores
+this in six interlinked tables (``keys``, ``files``, ``entities``, ``entity_keys``, ``objects``, and
+``object_keys``) and provides convenience methods so you rarely need to interact with those tables
+directly.
+
+This tutorial shows how to create a HERD, annotate objects in an NWB file, store the HERD in the
+file, and inspect the annotations after reading the file back. For the full HERD API (including
+``add_ref_termset`` for validating terms against a :py:class:`~hdmf.term_set.TermSet`, ``get_key``,
+and compound-data references), see the
+`HDMF HERD tutorial <https://hdmf.readthedocs.io/en/stable/tutorials/plot_external_resources.html>`_.
+"""
+
+# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_external_resources.png'
+from datetime import datetime
+from uuid import uuid4
+
+from dateutil.tz import tzlocal
+
+from pynwb import NWBHDF5IO, NWBFile
+from pynwb.file import Subject
+from pynwb.resources import HERD
+
+###############################################################################
+# Create an NWB file
+# ------------------
+# Start with an :py:class:`~pynwb.file.NWBFile` that has a :py:class:`~pynwb.file.Subject`. The
+# subject's species is the value we will annotate with an external resource.
+
+nwbfile = NWBFile(
+    session_description="a demonstration of external resources",
+    identifier=str(uuid4()),
+    session_start_time=datetime(2018, 4, 25, 2, 30, 3, tzinfo=tzlocal()),
+    subject=Subject(subject_id="001", species="Mus musculus"),
+)
+
+###############################################################################
+# Create a HERD and attach it to the file
+# ---------------------------------------
+# Create a :py:class:`~pynwb.resources.HERD` and assign it to the ``external_resources`` field of the
+# :py:class:`~pynwb.file.NWBFile`.
+
+nwbfile.external_resources = HERD()
+
+###############################################################################
+# Add references with ``add_ref``
+# -------------------------------
+# Use :py:meth:`~hdmf.common.resources.HERD.add_ref` to add a row that links a key on an object to an
+# external entity. Here we link the subject's species to the NCBI Taxonomy entry for *Mus musculus*.
+# The subject must be part of a file before a reference is added to it.
+#
+# An entity is identified by an ``entity_id`` and an ``entity_uri``. The ``entity_id`` is a compact
+# URI (CURIE) of the form ``prefix:identifier`` whose prefix is registered with
+# `bioregistry.io <https://bioregistry.io/>`_, such as ``NCBITaxon`` for the NCBI Taxonomy. The
+# ``entity_uri`` is the persistent URL the CURIE resolves to, which you can look up at
+# ``https://bioregistry.io/<entity_id>``.
+
+nwbfile.external_resources.add_ref(
+    container=nwbfile.subject,
+    key=nwbfile.subject.species,
+    entity_id="NCBITaxon:10090",
+    entity_uri="http://purl.obolibrary.org/obo/NCBITaxon_10090",
+)
+
+###############################################################################
+# References can also point to an attribute of an object, such as a column of a table. Here we record
+# the brain region of a set of electrodes in the electrodes table and link the region to the
+# corresponding structure in the
+# `Allen Mouse Brain Atlas <https://atlas.brain-map.org/>`_. When the target is a column, pass the
+# table as the ``container`` and the column name as the ``attribute``; HERD resolves the reference to
+# the column object itself.
+
+device = nwbfile.create_device(name="probe")
+electrode_group = nwbfile.create_electrode_group(
+    name="shank0",
+    description="a shank of the recording probe",
+    location="VISp",
+    device=device,
+)
+for _ in range(4):
+    nwbfile.add_electrode(location="VISp", group=electrode_group)
+
+nwbfile.external_resources.add_ref(
+    container=nwbfile.electrodes,
+    attribute="location",
+    key="VISp",
+    entity_id="MBA:385",
+    entity_uri="https://purl.brain-bican.org/ontology/mbao/MBA_385",
+)
+
+###############################################################################
+# Inspect the HERD
+# ----------------
+# :py:meth:`~hdmf.common.resources.HERD.to_dataframe` flattens the interlinked tables into a single
+# :py:class:`~pandas.DataFrame`, with one row per (object, key, entity) association.
+
+nwbfile.external_resources.to_dataframe()
+
+###############################################################################
+# You can also view the individual tables. Each is a
+# :py:class:`~hdmf.common.table.DynamicTable` and has its own ``to_dataframe`` method.
+
+nwbfile.external_resources.keys.to_dataframe()
+
+###############################################################################
+
+nwbfile.external_resources.entities.to_dataframe()
+
+###############################################################################
+# :py:meth:`~hdmf.common.resources.HERD.get_object_type` returns all annotations for objects of a
+# given type, for example every annotated :py:class:`~pynwb.file.Subject`.
+
+nwbfile.external_resources.get_object_type(object_type="Subject")
+
+###############################################################################
+# Write and read the NWB file
+# ---------------------------
+# Writing the file stores the HERD inside it. Reading the file back makes the HERD available again
+# through the ``external_resources`` field.
+
+filename = "external_resources_tutorial.nwb"
+with NWBHDF5IO(filename, mode="w") as io:
+    io.write(nwbfile)
+
+read_io = NWBHDF5IO(filename, mode="r")
+read_nwbfile = read_io.read()
+read_herd = read_nwbfile.external_resources
+
+###############################################################################
+# Access the loaded data
+# -----------------------
+# The loaded HERD provides the same accessors as before. In a Jupyter notebook, displaying the HERD
+# renders the flattened references as a table, and
+# :py:meth:`~hdmf.common.resources.HERD.to_dataframe` returns that same table as a
+# :py:class:`~pandas.DataFrame`. The individual tables give a more focused view.
+
+read_herd.to_dataframe()
+
+###############################################################################
+# View the individual tables, for example:
+
+read_herd.keys.to_dataframe()
+
+###############################################################################
+# :py:meth:`~hdmf.common.resources.HERD.get_object_entities` returns the entities annotated on a
+# single object as a :py:class:`~pandas.DataFrame`. Here we view the species annotation stored for
+# the subject:
+
+read_herd.get_object_entities(container=read_nwbfile.subject)
+
+###############################################################################
+# Close the file once you are done reading from it.
+
+read_io.close()
+
+###############################################################################
+# Alternative: store a HERD outside an NWB file
+# ---------------------------------------------
+# A HERD can also be saved independently of an NWB file as a zip archive of the underlying tables
+# using :py:meth:`~hdmf.common.resources.HERD.to_zip`, and read back with
+# :py:meth:`~hdmf.common.resources.HERD.from_zip`. This is useful when external resources span
+# multiple files; see :ref:`external_resources_streaming` for an example that annotates many NWB
+# files with a single HERD. For the full HERD API, see the
+# `HDMF HERD tutorial <https://hdmf.readthedocs.io/en/stable/tutorials/plot_external_resources.html>`_.
diff --git a/docs/gallery/general/resources_streaming.py b/docs/gallery/general/resources_streaming.py
@@ -0,0 +1,154 @@
+"""
+.. _external_resources_streaming:
+
+Annotating Multiple Streamed NWB Files with a Single HERD
+=========================================================
+
+A single :py:class:`~pynwb.resources.HERD` can hold external resource references for many
+:py:class:`~pynwb.file.NWBFile` objects at once. This makes it possible to build a shared set of
+ontology annotations across an entire dataset, for example every file in a
+`DANDI <https://dandiarchive.org/>`_ dandiset.
+
+This example streams each NWB file in a dandiset directly from the DANDI Archive (without
+downloading the full files) and adds references for two pieces of metadata in each file: the
+subject species (mapped to the `NCBI Taxonomy <https://www.ncbi.nlm.nih.gov/taxonomy>`_) and the
+experimenter (mapped to an `ORCID <https://orcid.org/>`_ iD). Because a HERD can be saved
+independently of any one file with :py:meth:`~hdmf.common.resources.HERD.to_zip`, the resulting
+HERD can be distributed alongside the dandiset as a standalone annotation layer and later reloaded
+with :py:meth:`~hdmf.common.resources.HERD.from_zip` to add further annotations.
+
+For storing a HERD inside a single NWB file, see :ref:`external_resources`.
+
+.. note::
+
+   This example reads data over the network and is not run when the documentation is built. To run
+   it yourself, install the ``dandi`` and ``fsspec`` packages:
+
+   .. code-block:: bash
+
+      pip install dandi fsspec aiohttp requests
+"""
+
+# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_streaming_external_resources.png'
+import h5py
+from dandi.dandiapi import DandiAPIClient
+from fsspec import filesystem
+from fsspec.implementations.cached import CachingFileSystem
+from tqdm import tqdm
+
+from pynwb import NWBHDF5IO
+from pynwb.resources import HERD
+
+###############################################################################
+# Collect the file URLs from DANDI
+# --------------------------------
+# Use the :py:class:`~dandi.dandiapi.DandiAPIClient` to list the S3 URL of every NWB file in a
+# dandiset. Here we use dandiset `000015 <https://dandiarchive.org/dandiset/000015>`_.
+
+dandiset_id = "000015"
+with DandiAPIClient() as client:
+    dandiset = client.get_dandiset(dandiset_id, "draft")
+    urls = [
+        asset.get_content_url(follow_redirects=1, strip_query=True)
+        for asset in dandiset.get_assets()
+    ]
+
+###############################################################################
+# Set up streaming
+# ----------------
+# Create an HTTP filesystem with a local cache so repeated reads do not re-download data.
+
+fs = CachingFileSystem(fs=filesystem("http"), cache_storage="nwb-cache")
+
+###############################################################################
+# Populate a single HERD across all files
+# ---------------------------------------
+# Open each file in read mode and add references for its subject species and experimenter. Checking
+# the value read from each file before annotating it keeps a file with unexpected metadata from being
+# mislabeled. Passing the same ``entity_id`` across files reuses the existing entity instead of
+# creating a duplicate.
+#
+# Each entity is identified by an ``entity_id``, a compact URI (CURIE) whose prefix is registered with
+# `bioregistry.io <https://bioregistry.io/>`_, and an ``entity_uri``, the persistent URL the CURIE
+# resolves to.
+
+herd = HERD()
+for url in tqdm(urls):
+    with fs.open(url, "rb") as f, h5py.File(f) as h5_file:
+        with NWBHDF5IO(file=h5_file) as io:
+            read_nwbfile = io.read()
+
+            # reference the subject species
+            species = read_nwbfile.subject.species
+            if species == "Mus musculus":
+                herd.add_ref(
+                    container=read_nwbfile.subject,
+                    key=species,
+                    entity_id="NCBITaxon:10090",
+                    entity_uri="http://purl.obolibrary.org/obo/NCBITaxon_10090",
+                )
+            else:
+                print(f"Unexpected species: {species}")
+
+            # reference the experimenter, an attribute of the NWBFile itself
+            experimenter = read_nwbfile.experimenter[0]
+            if experimenter == "Chen, Tsai-Wen":
+                herd.add_ref(
+                    container=read_nwbfile,
+                    attribute="experimenter",
+                    key=experimenter,
+                    entity_id="ORCID:0000-0001-6782-3819",
+                    entity_uri="https://orcid.org/0000-0001-6782-3819",
+                )
+            else:
+                print(f"Unexpected experimenter: {experimenter}")
+
+###############################################################################
+# Inspect and save the combined HERD
+# ----------------------------------
+# The flattened table now contains one row per (file, object, key, entity) association across all of
+# the streamed files. Save the HERD as a standalone zip archive that can be shared alongside the
+# dandiset.
+
+herd.to_dataframe()
+herd.to_zip(path="./dandiset_resources.zip")
+
+###############################################################################
+# Load an external HERD to annotate a file
+# ----------------------------------------
+# A HERD saved to a zip archive can be loaded later with
+# :py:meth:`~hdmf.common.resources.HERD.from_zip` and used to add further annotations. Here we load
+# the HERD we just saved, stream one of the files again, and annotate its institution with the
+# corresponding `Research Organization Registry (ROR) <https://ror.org/>`_ identifier.
+
+loaded_herd = HERD.from_zip(path="./dandiset_resources.zip")
+
+with fs.open(urls[0], "rb") as f, h5py.File(f) as h5_file:
+    with NWBHDF5IO(file=h5_file) as io:
+        read_nwbfile = io.read()
+        institution = read_nwbfile.institution
+        if institution == "Janelia Research Campus":
+            loaded_herd.add_ref(
+                container=read_nwbfile,
+                attribute="institution",
+                key=institution,
+                entity_id="ROR:013sk6x84",
+                entity_uri="https://ror.org/013sk6x84",
+            )
+        else:
+            print(f"Unexpected institution: {institution}")
+
+loaded_herd.to_dataframe()
+
+###############################################################################
+# To view the annotations for a single object, use
+# :py:meth:`~hdmf.common.resources.HERD.get_object_entities`. Here we view the species annotation
+# stored for the subject of the file we just streamed:
+
+loaded_herd.get_object_entities(container=read_nwbfile.subject)
+
+###############################################################################
+# Save the updated HERD as a new zip archive so the added institution annotation is persisted
+# alongside the original references.
+
+loaded_herd.to_zip(path="./dandiset_resources_updated.zip")
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -77,7 +77,18 @@ class CustomSphinxGallerySectionSortKey(ExampleTitleSortKey):
     # listed here will be added in alphabetical order based on title after the
     # explicitly listed galleries
     GALLERY_ORDER = {
-        'general': ['plot_file.py'],
+        'general': [
+            "plot_file.py",
+            "add_remove_containers.py",
+            "plot_timeintervals.py",
+            "scratch.py",
+            "extensions.py",
+            "plot_configurator.py",
+            "object_id.py",
+            "plot_read_basics.py",
+            "plot_external_resources.py",
+            "resources_streaming.py",
+        ],
         # Sort domain-specific tutorials based on domain to group tutorials belonging to the same domain
         'domain': [
             "ecephys.py",

diff --git a/docs/source/figures/gallery_thumbnails.pptx b/docs/source/figures/gallery_thumbnails.pptx
diff --git a/docs/source/figures/gallery_thumbnails_external_resources.png b/docs/source/figures/gallery_thumbnails_external_resources.png
diff --git a/docs/source/figures/gallery_thumbnails_streaming_external_resources.png b/docs/source/figures/gallery_thumbnails_streaming_external_resources.png