From 84df54ff96eccce3bd731b3f59e68b74fb30458e Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Sat, 18 Apr 2026 17:44:37 +0000
Subject: [PATCH 01/19] WIP: validation warning handling (2 failing tests
 remaining)

---
 src/hdmf/validate/errors.py                 | 15 +++-
 src/hdmf/validate/validator.py              | 97 +++++++++++++++++++--
 tests/unit/validator_tests/test_validate.py | 90 ++++++++++++++++++-
 3 files changed, 191 insertions(+), 11 deletions(-)

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
index c1b4360a1..eae51ee72 100644
--- a/src/hdmf/validate/errors.py
+++ b/src/hdmf/validate/errors.py
@@ -12,7 +12,9 @@
     "MissingDataType",
     "IllegalLinkError",
     "IncorrectDataType",
-    "IncorrectQuantityError"
+    "IncorrectQuantityError",
+    "ExtraFieldWarning",
+    "ValidationWarning"
 ]
 
 
@@ -212,3 +214,14 @@ def __init__(self, **kwargs):
         reason = "incorrect data_type - expected '%s', got '%s'" % (expected, received)
         loc = getargs('location', kwargs)
         super().__init__(name, reason, location=loc)
+
+
+class ValidationWarning(Error):
+    """Base class for validation warnings."""
+    @property
+    def message(self):
+        return self.reason 
+
+class ExtraFieldWarning(ValidationWarning):
+    """Warning for fields found in data but not defined in any applicable spec."""
+    pass
\ No newline at end of file
diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index b57e30587..38169828f 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType
+from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType, ValidationWarning, ExtraFieldWarning
 from .errors import ExpectedArrayError, IncorrectQuantityError
 from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder
 from ..build.builders import BaseBuilder
@@ -459,17 +459,41 @@ def validate(self, **kwargs):
         builder = getargs('builder', kwargs)
         attributes = builder.attributes
         ret = list()
+
+        spec_attr_names = set(self.__attribute_validators.keys())
+        spec_attr_names.add(self.spec.type_key())
+        spec_attr_names.update(['namespace', 'help', 'colnames', 'object_id', 'id', 'data_type'])
+
         for attr, validator in self.__attribute_validators.items():
             attr_val = attributes.get(attr)
-            if attr_val is None:
-                if validator.spec.required:
-                    ret.append(MissingError(self.get_spec_loc(validator.spec),
-                                            location=self.get_builder_loc(builder)))
-            else:
+            if attr_val is not None:
                 errors = validator.validate(attr_val)
                 for err in errors:
                     err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name
                 ret.extend(errors)
+
+            elif validator.spec.required:
+                ret.append(MissingError(
+                    self.get_spec_loc(validator.spec),
+                    location=self.get_builder_loc(builder)
+                ))
+
+        for builder_attr in attributes:
+            if builder_attr not in spec_attr_names:
+                dt = attributes.get(self.spec.type_key())
+                if dt is not None:
+                    dt_validator = self.vmap.get_validator(dt)
+                    if hasattr(dt_validator, "_BaseStorageValidator__attribute_validators"):
+                        dt_attrs = set(dt_validator._BaseStorageValidator__attribute_validators.keys())
+                        if builder_attr in dt_attrs:
+                            continue
+
+                ret.append(ExtraFieldWarning(
+                    self.get_spec_loc(self.spec),
+                    f"Unexpected attribute '{builder_attr}' encountered in {self.spec.name}",
+                    location=self.get_builder_loc(builder)
+                ))
+
         return ret
 
 
@@ -545,6 +569,7 @@ def validate(self, **kwargs):
         builder = getargs('builder', kwargs)
         errors = super().validate(builder)
         errors.extend(self.__validate_children(builder))
+
         return self._remove_duplicates(errors)
 
     def __validate_children(self, parent_builder):
@@ -566,7 +591,23 @@ def __validate_children(self, parent_builder):
                                  parent_builder.groups.values(),
                                  parent_builder.links.values())
         matcher.assign_to_specs(builder_children)
+        errors = []
+
+        extra_elements = {b.name for b in matcher.unmatched_builders}
+
+        if self.spec.data_type is not None:
+            for extra_builder in matcher.unmatched_builders:
+                if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
+                    continue 
+                if extra_builder.name in extra_elements:
+                    continue
+                yield ValidationWarning(
+                    self.get_spec_loc(self.spec),
+                    f"Unexpected element '{extra_builder.name}' encountered in {self.spec.name}",
+                    location=self.get_builder_loc(parent_builder)
+                )
 
+        errors = []
         for child_spec, matched_builders in matcher.spec_matches:
             yield from self.__validate_presence_and_quantity(child_spec, len(matched_builders), parent_builder)
             for child_builder in matched_builders:
@@ -613,14 +654,54 @@ def __construct_incorrect_quantity_error(self, child_spec, parent_builder, n_bui
 
     def __validate_child_builder(self, child_spec, child_builder, parent_builder):
         """Validate a child builder against a child spec considering links"""
+        all_errors = []
+        extra_attrib_sets = []
+
         if isinstance(child_builder, LinkBuilder):
             if self.__cannot_be_link(child_spec):
                 yield self.__construct_illegal_link_error(child_spec, parent_builder)
                 return  # do not validate illegally linked objects
             child_builder = child_builder.builder
+        
         child_builder_data_type = child_builder.attributes.get(self.spec.type_key())
-        for child_validator in self.__get_child_validators(child_spec, child_builder_data_type):
-            yield from child_validator.validate(child_builder)
+        validators = list(self.__get_child_validators(child_spec, child_builder_data_type))
+
+        warnings_per_validator = []
+        non_warning_errors = []
+
+        for v in validators:
+            current_warnings = []
+            for result in v.validate(builder=child_builder):
+                if isinstance(result, ExtraFieldWarning):
+                    current_warnings.append(result)
+                else:
+                    non_warning_errors.append(result)
+            warnings_per_validator.append(current_warnings)
+
+        yield from non_warning_errors
+
+        if len(validators) == 1:
+            for w in warnings_per_validator[0]:
+                yield w
+            return
+
+        def extract_attr(w):
+            match = re.search(r"'(.+?)'", w.reason)
+            return match.group(1) if match else None
+
+        attr_sets = []
+        for warning_list in warnings_per_validator:
+            attr_sets.append(set(extract_attr(w) for w in warning_list))
+
+        common_attrs = set.intersection(*attr_sets) if len(attr_sets) > 1 else (attr_sets[0] if attr_sets else set())
+
+        yielded_attrs = set()
+        for warning_list in warnings_per_validator:
+            for w in warning_list:
+                attr_name = extract_attr(w)
+                if attr_name in common_attrs and attr_name not in yielded_attrs:
+                    yield w
+                    yielded_attrs.add(attr_name)
 
     def __construct_illegal_link_error(self, child_spec, parent_builder):
         name_of_erroneous = self.get_spec_loc(child_spec)
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 8cdfb953e..0a38d65b4 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -12,10 +12,10 @@
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.validate import ValidatorMap
 from hdmf.validate.errors import (DtypeError, MissingError, ExpectedArrayError, MissingDataType,
-                                  IncorrectQuantityError, IllegalLinkError, ShapeError)
+                                  IncorrectQuantityError, IllegalLinkError, ShapeError, ValidationWarning, ExtraFieldWarning, Error)
 from hdmf.backends.hdf5 import HDF5IO
 from hdmf.utils import ZARR_INSTALLED, StrDataset
-from hdmf.validate.errors import Error
+from collections import defaultdict, OrderedDict, Counter
 
 CORE_NAMESPACE = 'test_core'
 
@@ -1907,3 +1907,89 @@ def test_isodatetime_no_time_component_fails(self):
                 # This confirms it fails because it lacks the 'T' and timezone
                 self.assertEqual(len(result), 1)
                 self.assertIsInstance(result[0], Error)
+
+
+class TestExtraFieldIntersection(ValidatorTestBase):
+    """Test that extra field warnings are only raised if unmatched by ALL applicable specs."""
+
+    def getSpecs(self):
+        base_spec = DatasetSpec(
+            doc='base type',
+            data_type_def='BaseType',
+            attributes=[]
+        )
+        ext_spec = DatasetSpec(
+            doc='extended type',
+            data_type_def='ExtType',
+            data_type_inc='BaseType',
+            attributes=[
+                AttributeSpec(name='resolution', dtype='float', doc='extra attribute')
+            ]
+        )
+        return (
+            base_spec,
+            ext_spec,
+            GroupSpec(
+                doc='A group containing the base type',
+                data_type_def='GroupWithBase',
+                datasets=[
+                    DatasetSpec(
+                        name='data_column',
+                        doc='a dataset of base type',
+                        data_type_inc='BaseType'
+                    )
+                ]
+            ),
+        )
+    
+    def test_attribute_intersection_no_warning(self):
+        """Test that 'resolution' triggers NO warning because ext_spec knows it."""
+        dataset = DatasetBuilder(
+            name='data_column',
+            data=[1.0, 2.0],
+            attributes={'data_type': 'BaseType', 'resolution': 0.1}
+        )
+        builder = GroupBuilder(
+            name='my_group',
+            attributes={'data_type': 'GroupWithBase'},
+            datasets=[dataset]
+        )
+
+        result = self.vmap.validate(builder)
+
+        extra_warnings = [r for r in result if isinstance(r, ExtraFieldWarning)]
+        self.assertEqual(len(extra_warnings), 0, f"False positive found: {extra_warnings}")
+
+    def test_truly_extra_attribute_warning(self):
+        """Test that a completely unknown attribute DOES trigger an ExtraFieldWarning."""
+        dataset = DatasetBuilder(
+            name='data_column',
+            data=[1.0],
+            attributes={'data_type': 'BaseType', 'completely_unknown': 'surprise'}
+        )
+        builder = GroupBuilder(
+            name='my_group',
+            attributes={'data_type': 'GroupWithBase'},
+            datasets=[dataset]
+        )
+
+        result = self.vmap.validate(builder)
+
+        extra_warnings = [r for r in result if isinstance(r, ExtraFieldWarning)]
+        self.assertEqual(len(extra_warnings), 1)
+        self.assertIn("completely_unknown", str(extra_warnings))
+
+    def test_unexpected_element_warning(self):
+        """Test that an unknown dataset triggers a ValidationWarning."""
+        extra_dataset = DatasetBuilder(name='extra_ds', data=[1])
+        builder = GroupBuilder(
+            name='my_group',
+            attributes={'data_type': 'GroupWithBase'},
+            datasets=[extra_dataset]
+        )
+
+        result = self.vmap.validate(builder)
+
+        warnings = [r for r in result if isinstance(r, ValidationWarning) and "Unexpected element" in str(r)]
+        self.assertEqual(len(warnings), 1)
+        self.assertIn("extra_ds", str(warnings))
\ No newline at end of file

From 6f1ce0b4b7aac43295ff82d31c10bb7e08e2d533 Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Tue, 31 Mar 2026 05:10:50 +0530
Subject: [PATCH 02/19] Add validation for dataset reference target types
 (#632) (#1429)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 CHANGELOG.md                                |   9 ++
 src/hdmf/validate/validator.py              |  55 +++++++-
 tests/unit/validator_tests/test_validate.py | 139 +++++++++++++++++++-
 3 files changed, 195 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fa81fde1d..86be68bec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,17 @@
 # HDMF Changelog
 
+## Unreleased
+
+### Fixed
+- Added missing validation for dataset reference target types to ensure correct `RefSpec.target_type` matching. @sejalpunwatkar [#1429](https://github.com/hdmf-dev/hdmf/pull/1429)
+
+
 ## HDMF 5.1.0 (March 24, 2026)
 
 ### Enhancements
 - Made `HERDManager` an abstract interface (ABC) with an abstract `external_resources` property. Subclasses must now declare `external_resources` in their `__fields__` to satisfy the interface. Updated `__gather_fields` to allow auto-generated properties to override inherited abstract properties. Added `link_resources` and `get_external_resources` methods to `HERDManager` for managing a linked HERD separate from the primary one. @rly [#1431](https://github.com/hdmf-dev/hdmf/pull/1431)
 
+
 ## HDMF 5.0.1 (March 16, 2026)
 
 ### Enhancements
@@ -18,6 +25,7 @@
 - Fixed writing compound dtype datasets with a single field. @rly [#1420](https://github.com/hdmf-dev/hdmf/pull/1420)
 - Replaced undeclared `pyyaml` dependency with `ruamel.yaml` (a declared core dependency) in `test_common_io.py`. The test relied on PyYAML being transitively installed by pip's `h5py`, which is not the case in conda environments. @rly [#1418](https://github.com/hdmf-dev/hdmf/pull/1418)
 
+
 ## HDMF 5.0.0 (March 2, 2026)
 
 ### Changed
@@ -266,6 +274,7 @@ is available on build (during the write process), but not on read of a dataset f
 ### Bug Fixes
 - Fixed `TermSetWrapper` warning raised during the setters. @mavaylon1 [#1116](https://github.com/hdmf-dev/hdmf/pull/1116)
 
+
 ## HDMF 3.13.0 (March 20, 2024)
 
 ### Enhancements
diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index 38169828f..82b72882c 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -410,7 +410,7 @@ def validate(self, **kwargs):
                 else:
                     target_spec = self.vmap.namespace.catalog.get_spec(spec.dtype.target_type)
                     data_type = value.attributes.get(target_spec.type_key())
-                    hierarchy = self.vmap.namespace.catalog.get_hierarchy(data_type)
+                    hierarchy = self.vmap.namespace.get_hierarchy(data_type)
                     if spec.dtype.target_type not in hierarchy:
                         ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
 
@@ -505,6 +505,27 @@ class DatasetValidator(BaseStorageValidator):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
+    def _check_ref_target_type(self, val, expected_type, type_key, builder, ret):
+        """Helper to recursively validate reference target types and hierarchy."""
+        if isinstance(val, ReferenceBuilder):
+            target = val.builder
+            ref_type = target.attributes.get(type_key)
+
+            if expected_type is not None and ref_type is not None:
+                hierarchy = self.vmap.namespace.get_hierarchy(ref_type)
+                if expected_type not in hierarchy:
+                    ret.append(
+                        IncorrectDataType(
+                            self.get_spec_loc(self.spec),
+                            f"{expected_type} (or subtype)",
+                            ref_type,
+                            location=self.get_builder_loc(builder)
+                        )
+                    )
+        elif isinstance(val, (list, tuple, np.ndarray)):
+            for v in val:
+                self._check_ref_target_type(v, expected_type, type_key, builder, ret)
+
     @docval({"name": "builder", "type": DatasetBuilder, "doc": "the builder to validate"},
             returns='a list of Errors', rtype=list)
     def validate(self, **kwargs):
@@ -523,16 +544,36 @@ def validate(self, **kwargs):
                         )
                     )
 
-                if not check_type(self.spec.dtype, dtype, string_format):
-                    if isinstance(self.spec.dtype, RefSpec):
-                        expected = f'{self.spec.dtype.reftype} reference'
-                    else:
+                if not isinstance(self.spec.dtype, RefSpec):
+                    if not check_type(self.spec.dtype, dtype, string_format):
                         expected = self.spec.dtype
-                    ret.append(DtypeError(self.get_spec_loc(self.spec), expected, dtype,
-                                          location=self.get_builder_loc(builder)))
+                        ret.append(
+                            DtypeError(
+                                self.get_spec_loc(self.spec),
+                                expected,
+                                dtype,
+                                location=self.get_builder_loc(builder)
+                            )
+                        )
+
+                else:
+                    if dtype != 'object':
+                        ret.append(
+                            DtypeError(
+                                self.get_spec_loc(self.spec),
+                                "object reference",
+                                dtype,
+                                location=self.get_builder_loc(builder)
+                            )
+                        )
+                    expected_target = self.spec.dtype.target_type
+                    type_key = self.spec.type_key()
+                    self._check_ref_target_type(data, expected_target, type_key, builder, ret)
+
             except EmptyArrayError:
                 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
                 pass
+
         if isinstance(builder.dtype, list) and len(np.shape(builder.data)) == 0:
             shape = ()  # scalar compound dataset
         elif isinstance(builder.dtype, list):
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 0a38d65b4..f9e3af2f0 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -12,7 +12,8 @@
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.validate import ValidatorMap
 from hdmf.validate.errors import (DtypeError, MissingError, ExpectedArrayError, MissingDataType,
-                                  IncorrectQuantityError, IllegalLinkError, ShapeError, ValidationWarning, ExtraFieldWarning, Error)
+                                  IncorrectQuantityError, IllegalLinkError, ShapeError, ValidationWarning, ExtraFieldWarning, Error, IncorrectDataType)
+
 from hdmf.backends.hdf5 import HDF5IO
 from hdmf.utils import ZARR_INSTALLED, StrDataset
 from collections import defaultdict, OrderedDict, Counter
@@ -1765,6 +1766,142 @@ def test_utf8_for_ascii_hdf5(self):
         self.assertIsInstance(results[0], DtypeError)
         self.assertEqual("Foo/data (my_foo/data): incorrect type - expected 'bytes', got 'utf'", str(results[0]))
 
+    def test_dataset_reference_type_validation_hierarchy_success(self):
+        """Test that subtype references are accepted via type hierarchy."""
+
+        foo_spec = DatasetSpec(
+            doc='dataset with ref',
+            dtype=RefSpec('Bar', 'object'),
+            data_type_def='Foo',
+            shape=None
+        )
+
+        bar_spec = DatasetSpec(
+            doc='base dataset',
+            data_type_def='Bar',
+            dtype='int',
+            shape=None
+        )
+
+        subbar_spec = DatasetSpec(
+            doc='subtype dataset',
+            data_type_def='SubBar',
+            data_type_inc='Bar',
+            dtype='int',
+            shape=None
+        )
+
+        catalog = SpecCatalog()
+        for spec in [foo_spec, bar_spec, subbar_spec]:
+            catalog.register_spec(spec, 'test.yaml')
+
+        namespace = SpecNamespace(
+            'test ns', 'test_ns',
+            [{'source': 'test.yaml'}],
+            version='0.1.0',
+            catalog=catalog
+        )
+
+        vmap = ValidatorMap(namespace)
+
+        subbar = DatasetBuilder('subbar', 5, attributes={'data_type': 'SubBar'})
+        foo = DatasetBuilder('foo', ReferenceBuilder(subbar), attributes={'data_type': 'Foo'})
+
+        errors = vmap.validate(foo)
+
+        assert len(errors) == 0
+
+    def test_dataset_reference_type_validation_failure(self):
+        foo_spec = DatasetSpec(
+            doc='dataset with ref',
+            dtype=RefSpec('Bar', 'object'),
+            data_type_def='Foo',
+            shape=None
+        )
+
+        bar_spec = DatasetSpec(
+            doc='correct dataset',
+            data_type_def='Bar',
+            dtype='int',
+            shape=None
+        )
+
+        baz_spec = DatasetSpec(
+            doc='wrong dataset',
+            data_type_def='Baz',
+            dtype='int',
+            shape=None
+        )
+
+        catalog = SpecCatalog()
+        for spec in [foo_spec, bar_spec, baz_spec]:
+            catalog.register_spec(spec, 'test.yaml')
+
+        namespace = SpecNamespace(
+            'test ns', 'test_ns',
+            [{'source': 'test.yaml'}],
+            version='0.1.0',
+            catalog=catalog
+        )
+
+        vmap = ValidatorMap(namespace)
+
+        baz = DatasetBuilder('baz', 5, attributes={'data_type': 'Baz'})
+        foo = DatasetBuilder('foo', ReferenceBuilder(baz), attributes={'data_type': 'Foo'})
+
+        errors = vmap.validate(foo)
+
+        assert len(errors) == 1
+        assert isinstance(errors[0], IncorrectDataType)
+
+    def test_dataset_reference_list_mixed(self):
+        foo_spec = DatasetSpec(
+            doc='dataset with ref',
+            dtype=RefSpec('Bar', 'object'),
+            data_type_def='Foo'
+        )
+
+        bar_spec = DatasetSpec(
+            doc='correct dataset',
+            data_type_def='Bar',
+            dtype='int'
+        )
+
+        baz_spec = DatasetSpec(
+            doc='wrong dataset',
+            data_type_def='Baz',
+            dtype='int'
+        )
+
+        catalog = SpecCatalog()
+        for spec in [foo_spec, bar_spec, baz_spec]:
+            catalog.register_spec(spec, 'test.yaml')
+
+        namespace = SpecNamespace(
+            'test ns', 'test_ns',
+            [{'source': 'test.yaml'}],
+            version='0.1.0',
+            catalog=catalog
+        )
+
+        vmap = ValidatorMap(namespace)
+
+        bar = DatasetBuilder('bar', 5, attributes={'data_type': 'Bar'})
+        baz = DatasetBuilder('baz', 5, attributes={'data_type': 'Baz'})
+
+        foo = DatasetBuilder(
+            'foo',
+            [
+                ReferenceBuilder(bar),
+                ReferenceBuilder(baz)
+            ],
+            attributes={'data_type': 'Foo'}
+        )
+
+        errors = vmap.validate(foo)
+
+        assert len(errors) == 1
+
 
 class TestISODateTimeTimezone(ValidatorTestBase):
     """Test that isodatetime specs for both Datasets and Attributes require timezone information."""

From 48519de80aa252bc993bf66035fda62dca238a11 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 16:47:13 -0700
Subject: [PATCH 03/19] Bump codecov/codecov-action from 5 to 6 (#1433)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 .github/workflows/run_coverage.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml
index c4ab3345a..656f1549c 100644
--- a/.github/workflows/run_coverage.yml
+++ b/.github/workflows/run_coverage.yml
@@ -66,7 +66,7 @@ jobs:
           pytest --cov --cov-report=xml --cov-report=term
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v6
         with:
           fail_ci_if_error: true
           files: ./coverage.xml
@@ -120,7 +120,7 @@ jobs:
           pytest --cov --cov-report=xml --cov-report=term tests/unit/test_io_hdf5_streaming.py
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v5
+        uses: codecov/codecov-action@v6
         with:
           fail_ci_if_error: true
           files: ./coverage.xml

From 55f674d29c8700cdf2c2136e65c118ca26da219d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 23:52:29 +0000
Subject: [PATCH 04/19] [pre-commit.ci] pre-commit autoupdate (#1434)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0f6958796..e698bb4e1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
 #     hooks:
 #     -   id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.7
+    rev: v0.15.8
     hooks:
     -   id: ruff
 # -   repo: https://github.com/econchick/interrogate

From 4d1172180e27fccfe50d89abd19063518b4c2e7d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 18:21:54 +0000
Subject: [PATCH 05/19] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/hdmf/validate/errors.py                 | 4 ++--
 src/hdmf/validate/validator.py              | 4 ++--
 tests/unit/validator_tests/test_validate.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
index eae51ee72..6fd248d76 100644
--- a/src/hdmf/validate/errors.py
+++ b/src/hdmf/validate/errors.py
@@ -220,8 +220,8 @@ class ValidationWarning(Error):
     """Base class for validation warnings."""
     @property
     def message(self):
-        return self.reason 
+        return self.reason
 
 class ExtraFieldWarning(ValidationWarning):
     """Warning for fields found in data but not defined in any applicable spec."""
-    pass
\ No newline at end of file
+    pass
diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index 82b72882c..800c7c707 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -639,7 +639,7 @@ def __validate_children(self, parent_builder):
         if self.spec.data_type is not None:
             for extra_builder in matcher.unmatched_builders:
                 if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
-                    continue 
+                    continue
                 if extra_builder.name in extra_elements:
                     continue
                 yield ValidationWarning(
@@ -703,7 +703,7 @@ def __validate_child_builder(self, child_spec, child_builder, parent_builder):
                 yield self.__construct_illegal_link_error(child_spec, parent_builder)
                 return  # do not validate illegally linked objects
             child_builder = child_builder.builder
-        
+
         child_builder_data_type = child_builder.attributes.get(self.spec.type_key())
         validators = list(self.__get_child_validators(child_spec, child_builder_data_type))
 
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index f9e3af2f0..d302caea8 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -2078,7 +2078,7 @@ def getSpecs(self):
                 ]
             ),
         )
-    
+
     def test_attribute_intersection_no_warning(self):
         """Test that 'resolution' triggers NO warning because ext_spec knows it."""
         dataset = DatasetBuilder(
@@ -2129,4 +2129,4 @@ def test_unexpected_element_warning(self):
 
         warnings = [r for r in result if isinstance(r, ValidationWarning) and "Unexpected element" in str(r)]
         self.assertEqual(len(warnings), 1)
-        self.assertIn("extra_ds", str(warnings))
\ No newline at end of file
+        self.assertIn("extra_ds", str(warnings))

From 2e9565c02b4537991f82c75341f7379087bfb5cc Mon Sep 17 00:00:00 2001
From: Ben Dichter <ben.dichter@gmail.com>
Date: Mon, 20 Apr 2026 14:41:52 -0400
Subject: [PATCH 06/19] Make `expandable` a list; default expands only
 DynamicTable columns (#1439)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: rly <310197+rly@users.noreply.github.com>
---
 CHANGELOG.md                       |   3 +
 src/hdmf/backends/hdf5/h5tools.py  |  47 +++++--
 tests/unit/common/test_table.py    |   1 +
 tests/unit/test_io_hdf5_h5tools.py | 216 +++++++++++++++++++++++++++--
 4 files changed, 246 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86be68bec..0d204394e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## Unreleased
 
+### Breaking changes
+- `HDF5IO` `expandable` argument is now a list of data type names instead of a boolean. The default is `("VectorData", "ElementIdentifiers")`, so only `DynamicTable` columns and id are expandable out of the box — previously every dataset with a matching spec shape was expanded. Datasets of types outside this list that previously were expandable by default will now default to fixed-shape on-disk layout; add the relevant type to `expandable` to restore prior behavior. Replace `expandable=True` with an explicit list (e.g. `["VectorData", "ElementIdentifiers", "MyType"]`) and `expandable=False` with `[]`; passing `True`/`False` now raises a `TypeError`. @bendichter @rly [#1439](https://github.com/hdmf-dev/hdmf/pull/1439)
+
 ### Fixed
 - Added missing validation for dataset reference target types to ensure correct `RefSpec.target_type` matching. @sejalpunwatkar [#1429](https://github.com/hdmf-dev/hdmf/pull/1429)
 
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index ca564aee9..38d440a26 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -310,9 +310,13 @@ def __get_namespaces(cls, file_obj):
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
              'default': None},
-            {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('If True (default), datasets will be created as expandable by setting the maxshape '
-                     'based on the matching shape defined in the spec.')})
+            {'name': 'expandable', 'type': (list, tuple),
+             'default': ("VectorData", "ElementIdentifiers"),
+             'doc': ('A list of data type names whose datasets (and subclasses) will be created as '
+                     'expandable — maxshape is set based on the matching shape defined in the spec. '
+                     'Default is ("VectorData", "ElementIdentifiers"), so only DynamicTable columns '
+                     'and id are expandable. Pass an empty list/tuple to disable automatic expansion '
+                     'entirely.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -759,9 +763,13 @@ def close_linked_files(self):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
-            {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('If True (default), datasets will be created as expandable by setting the maxshape '
-                     'based on the matching shape defined in the spec.')})
+            {'name': 'expandable', 'type': (list, tuple),
+             'default': ("VectorData", "ElementIdentifiers"),
+             'doc': ('A list of data type names whose datasets (and subclasses) will be created as '
+                     'expandable — maxshape is set based on the matching shape defined in the spec. '
+                     'Default is ("VectorData", "ElementIdentifiers"), so only DynamicTable columns '
+                     'and id are expandable. Pass an empty list/tuple to disable automatic expansion '
+                     'entirely.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
@@ -939,9 +947,13 @@ def _filler():
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
-            {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('If True (default), datasets will be created as expandable by setting the maxshape '
-                     'based on the matching shape defined in the spec.')},
+            {'name': 'expandable', 'type': (list, tuple),
+             'default': ("VectorData", "ElementIdentifiers"),
+             'doc': ('A list of data type names whose datasets (and subclasses) will be created as '
+                     'expandable — maxshape is set based on the matching shape defined in the spec. '
+                     'Default is ("VectorData", "ElementIdentifiers"), so only DynamicTable columns '
+                     'and id are expandable. Pass an empty list/tuple to disable automatic expansion '
+                     'entirely.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
         parent, builder = popargs('parent', 'builder', kwargs)
@@ -1042,9 +1054,13 @@ def write_link(self, **kwargs):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
-            {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('If True (default), datasets will be created as expandable by setting the maxshape '
-                     'based on the matching shape defined in the spec.')},
+            {'name': 'expandable', 'type': (list, tuple),
+             'default': ("VectorData", "ElementIdentifiers"),
+             'doc': ('A list of data type names whose datasets (and subclasses) will be created as '
+                     'expandable — maxshape is set based on the matching shape defined in the spec. '
+                     'Default is ("VectorData", "ElementIdentifiers"), so only DynamicTable columns '
+                     'and id are expandable. Pass an empty list/tuple to disable automatic expansion '
+                     'entirely.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5
@@ -1071,12 +1087,17 @@ def write_dataset(self, **kwargs):  # noqa: C901
         else:
             options['io_settings'] = {}
 
-        # Set maxshape to make a non-scalar dataset expandable but do not override existing settings
+        # Set maxshape to make datasets expandable. `expandable` is a list/tuple of data type names:
+        # a dataset is made expandable if its data type (or an ancestor) is in the list. The default
+        # list ("VectorData", "ElementIdentifiers") covers DynamicTable columns and id, which users
+        # commonly need to append to. An empty list disables automatic expansion.
         if (
             expandable
             and 'maxshape' not in options['io_settings']
             and np.ndim(data) != 0
             and matched_spec_shape is not None
+            and self.manager.get_builder_dt(builder) is not None
+            and any(self.manager.is_sub_data_type(builder, dt) for dt in expandable)
         ):
             options['io_settings']['maxshape'] = matched_spec_shape
 
diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py
index 7122dfb41..e5cf80ff2 100644
--- a/tests/unit/common/test_table.py
+++ b/tests/unit/common/test_table.py
@@ -3211,6 +3211,7 @@ def test_append(self, cache_spec=False):
 
         self.assertContainerEqual(read_container['table']['y'][-1], group1)
 
+
 class TestVectorIndexDtype(TestCase):
 
     def set_up_array_index(self):
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index f47f4e641..e522e52a8 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -23,19 +23,22 @@
 from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder
 from hdmf.container import Container, HERDManager
 from hdmf import Data, docval
-from hdmf.common import SimpleMultiContainer, get_hdf5io
+from hdmf.common import (DynamicTable, VectorData, SimpleMultiContainer, get_hdf5io, get_manager,
+                         get_type_map)
 from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError, append_data
+from hdmf.spec import DatasetSpec
 from hdmf.spec.catalog import SpecCatalog
 from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace
 from hdmf.spec.spec import GroupSpec, DtypeSpec
-from hdmf.testing import TestCase, remove_test_file
+from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file
 from hdmf.common.resources import HERD
 from hdmf.term_set import TermSet, TermSetWrapper
 from hdmf.utils import get_data_shape
 
 from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
                               Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager,
-                              CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper,
+                              CORE_NAMESPACE, create_load_namespace_yaml, get_temp_filepath,
+                              CacheSpecTestHelper,
                               CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace,
                               QuxData, QuxBucket, get_qux_buildmanager)
 from tests.unit.helpers.io import DoNothingIO
@@ -878,14 +881,18 @@ def test_roundtrip_basic(self):
                                  read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
 
     def test_roundtrip_basic_append(self):
-        # Setup all the data we need
-        foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
+        # Foo.my_data is an anonymous (untyped) dataset, so the default `expandable` list does
+        # not cover it. Wrap its data in H5DataIO with an explicit maxshape to make it expandable.
+        foo1 = Foo('foo1', H5DataIO(data=[1, 2, 3, 4, 5], maxshape=(None,)), "I am foo1", 17, 3.14)
         foobucket = FooBucket('bucket1', [foo1])
         foofile = FooFile(buckets=[foobucket])
 
         with HDF5IO(self.path, manager=self.manager, mode='w') as io:
             io.write(foofile)
 
+        with h5py.File(self.path, 'r') as f:
+            self.assertEqual(f['buckets/bucket1/foo_holder/foo1/my_data'].maxshape, (None,))
+
         with HDF5IO(self.path, manager=self.manager, mode='a') as io:
             read_foofile = io.read()
             data = read_foofile.buckets['bucket1'].foos['foo1'].my_data
@@ -4175,7 +4182,7 @@ def test_expand_false(self):
         foofile = FooFile(buckets=[foobucket])
 
         with HDF5IO(self.path, manager=self.manager, mode='w') as io:
-            io.write(foofile, expandable=False)
+            io.write(foofile, expandable=[])
 
         with HDF5IO(self.path, manager=self.manager, mode='r') as io:
             read_foofile = io.read()
@@ -4191,7 +4198,7 @@ def test_multi_shape_no_labels(self):
         manager = get_qux_buildmanager([[None, None], [None, 3]])
 
         with HDF5IO(self.path, manager=manager, mode='w') as io:
-            io.write(quxbucket, expandable=True)
+            io.write(quxbucket, expandable=['QuxData'])
 
         with HDF5IO(self.path, manager=manager, mode='r') as io:
             read_quxbucket = io.read()
@@ -4204,7 +4211,7 @@ def test_expand_set_shape(self):
         manager = get_qux_buildmanager([None, 3])
 
         with HDF5IO(self.path, manager=manager, mode='w') as io:
-            io.write(quxbucket, expandable=True)
+            io.write(quxbucket, expandable=['QuxData'])
 
         with HDF5IO(self.path, manager=manager, mode='r+') as io:
             read_quxbucket = io.read()
@@ -4215,3 +4222,196 @@ def test_expand_set_shape(self):
                                  [7, 8, 9]])
             npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
             self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None, 3))
+
+
+class TestDefaultExpandable(H5RoundTripMixin, TestCase):
+    """Test that VectorData, VectorIndex, and ElementIdentifiers datasets are expandable by default."""
+
+    def setUpContainer(self):
+        table = DynamicTable(name='table0', description='an example table')
+        table.add_column(name='foo', description='an int column')
+        table.add_column(name='bar', description='an indexed column', index=True)
+        table.add_row(foo=1, bar=[1, 2, 3])
+        table.add_row(foo=2, bar=[4, 5])
+        return table
+
+    def test_roundtrip(self):
+        super().test_roundtrip()
+
+        with h5py.File(self.filename, 'r') as f:
+            # VectorData columns should be expandable (maxshape has None in first dim)
+            self.assertEqual(f['foo'].maxshape, (None,))
+            self.assertIsNotNone(f['foo'].chunks)
+
+            # VectorIndex columns should be expandable
+            self.assertEqual(f['bar_index'].maxshape, (None,))
+            self.assertIsNotNone(f['bar_index'].chunks)
+
+            # Indexed VectorData should be expandable
+            self.assertEqual(f['bar'].maxshape, (None,))
+            self.assertIsNotNone(f['bar'].chunks)
+
+            # ElementIdentifiers (id column) should be expandable
+            self.assertEqual(f['id'].maxshape, (None,))
+            self.assertIsNotNone(f['id'].chunks)
+
+
+class TestDefaultExpandableWithReferences(H5RoundTripMixin, TestCase):
+    """Test that a VectorData column of references is expandable by default."""
+
+    def setUpContainer(self):
+        group1 = Container('group1')
+        group2 = Container('group2')
+
+        table = DynamicTable(name='table0', description='an example table')
+        table.add_column(name='ref', description='a reference column')
+        table.add_row(ref=group1)
+        table.add_row(ref=group2)
+
+        multi = SimpleMultiContainer(name='multi')
+        multi.add_container(group1)
+        multi.add_container(group2)
+        multi.add_container(table)
+        return multi
+
+    def test_roundtrip(self):
+        super().test_roundtrip()
+
+        with h5py.File(self.filename, 'r') as f:
+            ref_ds = f['table0/ref']
+            self.assertEqual(ref_ds.maxshape, (None,))
+            self.assertIsNotNone(ref_ds.chunks)
+            # Reference dtype should not bypass the expandable branch.
+            self.assertTrue(h5py.check_ref_dtype(ref_ds.dtype))
+            # Stored refs resolve to the expected target groups.
+            self.assertIsInstance(ref_ds[0], h5py.Reference)
+            self.assertEqual(f[ref_ds[0]].name, '/group1')
+            self.assertEqual(f[ref_ds[1]].name, '/group2')
+
+
+class TestDefaultExpandableExplicitOverride(H5RoundTripMixin, TestCase):
+    """Test that explicit H5DataIO settings are not overridden by the default expandable behavior."""
+
+    def setUpContainer(self):
+        # User explicitly sets maxshape — should be respected
+        foo = VectorData(
+            name='foo',
+            description='a column with explicit maxshape',
+            data=H5DataIO(data=[1, 2, 3], maxshape=(10,)),
+        )
+        table = DynamicTable(name='table0', description='an example table', columns=[foo])
+        return table
+
+    def test_roundtrip(self):
+        super().test_roundtrip()
+
+        with h5py.File(self.filename, 'r') as f:
+            # User's explicit maxshape should be preserved, not overridden
+            self.assertEqual(f['foo'].maxshape, (10,))
+
+
+class TestExpandableArg(TestCase):
+    """Test explicit values of the `expandable` argument to HDF5IO.write."""
+
+    def setUp(self):
+        self.filename = get_temp_filepath()
+
+    def tearDown(self):
+        remove_test_file(self.filename)
+
+    def _make_table(self):
+        table = DynamicTable(name='table0', description='an example table')
+        table.add_column(name='foo', description='an int column')
+        table.add_row(foo=1)
+        table.add_row(foo=2)
+        return table
+
+    def test_empty_disables_expansion(self):
+        with HDF5IO(self.filename, manager=get_manager(), mode='w') as io:
+            io.write(self._make_table(), expandable=[])
+
+        with h5py.File(self.filename, 'r') as f:
+            # With an empty expandable list, non-scalar datasets get a fixed shape, not (None,).
+            self.assertEqual(f['foo'].maxshape, (2,))
+            self.assertEqual(f['id'].maxshape, (2,))
+
+    def test_custom_list_expands_only_listed(self):
+        with HDF5IO(self.filename, manager=get_manager(), mode='w') as io:
+            io.write(self._make_table(), expandable=['VectorData'])
+
+        with h5py.File(self.filename, 'r') as f:
+            # VectorData column 'foo' is in the list → expandable.
+            self.assertEqual(f['foo'].maxshape, (None,))
+            # ElementIdentifiers 'id' is NOT in the list → fixed shape.
+            self.assertEqual(f['id'].maxshape, (2,))
+
+    def test_bool_raises_type_error(self):
+        """Passing the old boolean API must fail loudly, per the 5.2.0 breaking change."""
+        with HDF5IO(self.filename, manager=get_manager(), mode='w') as io:
+            with self.assertRaises(TypeError):
+                io.write(self._make_table(), expandable=True)
+            with self.assertRaises(TypeError):
+                io.write(self._make_table(), expandable=False)
+
+
+class TestDefaultExpandableSubclasses(TestCase):
+    """Test that subclasses of VectorData and ElementIdentifiers are expandable by default."""
+
+    def setUp(self):
+        self.test_dir = tempfile.mkdtemp()
+
+        custom_ei_spec = DatasetSpec(
+            data_type_def='CustomElementIdentifiers',
+            data_type_inc='ElementIdentifiers',
+            doc='a custom ElementIdentifiers subclass',
+        )
+        custom_vd_spec = DatasetSpec(
+            data_type_def='CustomVectorData',
+            data_type_inc='VectorData',
+            doc='a custom VectorData subclass',
+        )
+
+        self.type_map = get_type_map()
+        create_load_namespace_yaml(
+            namespace_name=CORE_NAMESPACE,
+            specs=[custom_ei_spec, custom_vd_spec],
+            output_dir=self.test_dir,
+            incl_types={'hdmf-common': ['ElementIdentifiers', 'VectorData', 'DynamicTable']},
+            type_map=self.type_map,
+        )
+        self.manager = BuildManager(self.type_map)
+
+        self.CustomElementIdentifiers = self.type_map.get_dt_container_cls(
+            'CustomElementIdentifiers', CORE_NAMESPACE
+        )
+        self.CustomVectorData = self.type_map.get_dt_container_cls(
+            'CustomVectorData', CORE_NAMESPACE
+        )
+
+        self.filename = os.path.join(self.test_dir, 'test_expandable_subclasses.h5')
+
+    def tearDown(self):
+        shutil.rmtree(self.test_dir)
+
+    def test_subclasses_are_expandable(self):
+        custom_ids = self.CustomElementIdentifiers(name='id', data=[10, 20])
+        custom_col = self.CustomVectorData(
+            name='custom_col', description='a custom column', data=[1.0, 2.0]
+        )
+        table = DynamicTable(
+            name='table0',
+            description='an example table',
+            id=custom_ids,
+            columns=[custom_col],
+        )
+
+        with HDF5IO(self.filename, manager=self.manager, mode='w') as write_io:
+            write_io.write(table)
+
+        with h5py.File(self.filename, 'r') as f:
+            # ElementIdentifiers subclass (id) should be expandable
+            self.assertEqual(f['id'].maxshape, (None,))
+            self.assertIsNotNone(f['id'].chunks)
+            # VectorData subclass (custom_col) should be expandable
+            self.assertEqual(f['custom_col'].maxshape, (None,))
+            self.assertIsNotNone(f['custom_col'].chunks)

From dc890012ec2e8a6e4c03dd44363080fb7366053a Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Sat, 18 Apr 2026 17:44:37 +0000
Subject: [PATCH 07/19] WIP: validation warning handling (2 failing tests
 remaining)

---
 src/hdmf/validate/errors.py                 | 1 +
 src/hdmf/validate/validator.py              | 5 +++++
 tests/unit/validator_tests/test_validate.py | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
index 6fd248d76..b16015887 100644
--- a/src/hdmf/validate/errors.py
+++ b/src/hdmf/validate/errors.py
@@ -225,3 +225,4 @@ def message(self):
 class ExtraFieldWarning(ValidationWarning):
     """Warning for fields found in data but not defined in any applicable spec."""
     pass
+
diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index 800c7c707..3dd723e8b 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -640,6 +640,11 @@ def __validate_children(self, parent_builder):
             for extra_builder in matcher.unmatched_builders:
                 if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
                     continue
+
+                if extra_builder.name in seen:
+                    continue
+                seen.add(extra_builder.name)
+
                 if extra_builder.name in extra_elements:
                     continue
                 yield ValidationWarning(
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index d302caea8..c6195bbaf 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -2129,4 +2129,6 @@ def test_unexpected_element_warning(self):
 
         warnings = [r for r in result if isinstance(r, ValidationWarning) and "Unexpected element" in str(r)]
         self.assertEqual(len(warnings), 1)
+
         self.assertIn("extra_ds", str(warnings))
+        

From 501df27ae0b6d9b6797f7bc3727d8da84e2b6b4b Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Thu, 23 Apr 2026 14:20:28 +0000
Subject: [PATCH 08/19] Fix validation: skip wrong-named children, dedupe
 warnings, fix scalar shape error

---
 src/hdmf/validate/validator.py              | 126 ++++++++++++++++++--
 tests/unit/validator_tests/test_validate.py |   2 +
 2 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index 3dd723e8b..452bc1756 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -436,6 +436,7 @@ def validate(self, **kwargs):
             if not check_shape(spec.shape, shape):
                 if shape is None:
                     ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(value)))
+                    return ret
                 else:
                     ret.append(ShapeError(self.get_spec_loc(spec), spec.shape, shape))
 
@@ -481,18 +482,60 @@ def validate(self, **kwargs):
         for builder_attr in attributes:
             if builder_attr not in spec_attr_names:
                 dt = attributes.get(self.spec.type_key())
+                found = False
+                if not found and dt is not None:
+                    try:
+                        for candidate_dt, v in self.vmap._ValidatorMap__validator_map.items():
+                            if candidate_dt == dt:
+                                continue
+                            if hasattr(v, "_BaseStorageValidator__attribute_validators"):
+                                candidate_hierarchy = self.vmap.namespace.get_hierarchy(candidate_dt)
+                                if dt in candidate_hierarchy:
+                                    dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
+                                    if builder_attr in dt_attrs:
+                                        found = True
+                                        break
+                    except Exception:
+                        pass
                 if dt is not None:
-                    dt_validator = self.vmap.get_validator(dt)
-                    if hasattr(dt_validator, "_BaseStorageValidator__attribute_validators"):
-                        dt_attrs = set(dt_validator._BaseStorageValidator__attribute_validators.keys())
-                        if builder_attr in dt_attrs:
-                            continue
+                    try:
+                        types_to_check = [dt] + list(self.vmap.namespace.get_hierarchy(dt))
+                        for candidate_dt in types_to_check:
+                            v = self.vmap.get_validator(candidate_dt)
+                            if hasattr(v, "_BaseStorageValidator__attribute_validators"):
+                                dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
+                                if builder_attr in dt_attrs:
+                                    found = True
+                                    break
+                    except Exception:
+                        pass
+               
+                if not found:
+                    try:
+                        for ns_name in self.vmap.namespace.catalog.get_namespace_names():
+                            ns = self.vmap.namespace.catalog.get_namespace(ns_name)
+                            for candidate_dt in ns.get_registered_types():
+                                candidate_hierarchy = self.vmap.namespace.get_hierarchy(candidate_dt)
+                                if dt in candidate_hierarchy and candidate_dt != dt:
+                                    v = self.vmap.get_validator(candidate_dt)
+                                    if hasattr(v, "_BaseStorageValidator__attribute_validators"):
+                                        dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
+                                        if builder_attr in dt_attrs:
+                                            found = True
+                                            break
+                                if found:
+                                    break
+                            if found:
+                                break
+                    except Exception:
+                        pass
 
-                ret.append(ExtraFieldWarning(
+                if not found:
+                    ret.append(ExtraFieldWarning(
                     self.get_spec_loc(self.spec),
                     f"Unexpected attribute '{builder_attr}' encountered in {self.spec.name}",
                     location=self.get_builder_loc(builder)
-                ))
+                    ))
 
         return ret
 
@@ -634,9 +677,47 @@ def __validate_children(self, parent_builder):
         matcher.assign_to_specs(builder_children)
         errors = []
 
-        extra_elements = {b.name for b in matcher.unmatched_builders}
+        seen = set()
+        effective_dt = self.spec.data_type or parent_builder.attributes.get(self.spec.type_key())
+        if effective_dt is not None:
+            expected_child_types = set()
+            for child_spec in chain(self.spec.datasets, self.spec.groups, self.spec.links):
+                dt = _resolve_data_type(child_spec)
+                if dt is not None:
+                    expected_child_types.add(dt)
+            try:
+                for ancestor_dt in self.vmap.namespace.get_hierarchy(effective_dt):
+                    ancestor_validator = self.vmap.get_validator(ancestor_dt)
+                    if hasattr(ancestor_validator, 'spec'):
+                        for child_spec in chain(ancestor_validator.spec.datasets,
+                                                ancestor_validator.spec.groups,
+                                                ancestor_validator.spec.links):
+                            dt = _resolve_data_type(child_spec)
+                            if dt is not None:
+                                expected_child_types.add(dt)
+            except Exception:
+                pass
+        builder_effective_dt = parent_builder.attributes.get(self.spec.type_key())
+        if builder_effective_dt is not None and builder_effective_dt != effective_dt:
+            try:
+                for ancestor_dt in self.vmap.namespace.get_hierarchy(builder_effective_dt):
+                    ancestor_validator = self.vmap.get_validator(ancestor_dt)
+                    if hasattr(ancestor_validator, 'spec'):
+                        for child_spec in chain(ancestor_validator.spec.datasets,
+                                                ancestor_validator.spec.groups,
+                                                ancestor_validator.spec.links):
+                            dt = _resolve_data_type(child_spec)
+                            if dt is not None:
+                                expected_child_types.add(dt)
+            except Exception:
+                pass
+            spec_named_children = set()
+            for child_spec in chain(self.spec.datasets, self.spec.groups, self.spec.links):
+                if hasattr(child_spec, 'name') and child_spec.name is not None:
+                    spec_named_children.add(child_spec.name)
+
+            COLUMN_TYPES = {'VectorData', 'VectorIndex', 'ElementIdentifiers', 'DynamicTableRegion'}
 
-        if self.spec.data_type is not None:
             for extra_builder in matcher.unmatched_builders:
                 if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
                     continue
@@ -644,9 +725,32 @@ def __validate_children(self, parent_builder):
                 if extra_builder.name in seen:
                     continue
                 seen.add(extra_builder.name)
-
-                if extra_builder.name in extra_elements:
+                if isinstance(extra_builder, LinkBuilder):
+                    extra_builder = extra_builder.builder
+                builder_dt = extra_builder.attributes.get(self.spec.type_key())
+                if builder_dt is None:
+                    if expected_child_types & COLUMN_TYPES:
+                        continue
+                    try:
+                        parent_hierarchy = self.vmap.namespace.get_hierarchy(effective_dt)
+                        if 'DynamicTable' in parent_hierarchy:
+                            continue
+                    except Exception:
+                        pass
+                else:
+                    try:
+                        hierarchy = self.vmap.namespace.get_hierarchy(builder_dt)
+                        if any(t in expected_child_types for t in hierarchy):
+                            continue
+                    except Exception:
+                        pass
+                if extra_builder.name in spec_named_children:
+                    continue
+                if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
                     continue
+                if extra_builder.name in seen:
+                    continue
+                seen.add(extra_builder.name)
                 yield ValidationWarning(
                     self.get_spec_loc(self.spec),
                     f"Unexpected element '{extra_builder.name}' encountered in {self.spec.name}",
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index c6195bbaf..5a2a32704 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -1653,6 +1653,8 @@ def test_scalar_instead_of_array(self):
             ]
         )
         result = self.vmap.validate(builder)
+        for r in result:
+            print(f"DEBUG error: {type(r).__name__}: {r}")
         self.assertEqual(len(result), 1)
         # Should be ExpectedArrayError, not ShapeError
         self.assertIsInstance(result[0], ExpectedArrayError)

From 883b7330fc1a22a8da849f97289bfef9b105288a Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Thu, 23 Apr 2026 14:50:27 +0000
Subject: [PATCH 09/19] Fix validator: clean duplicate checks in unmatched
 builders

---
 src/hdmf/validate/validator.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index 452bc1756..cb96e3d38 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -719,9 +719,8 @@ def __validate_children(self, parent_builder):
             COLUMN_TYPES = {'VectorData', 'VectorIndex', 'ElementIdentifiers', 'DynamicTableRegion'}
 
             for extra_builder in matcher.unmatched_builders:
-                if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
+                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
                     continue
-
                 if extra_builder.name in seen:
                     continue
                 seen.add(extra_builder.name)
@@ -746,11 +745,6 @@ def __validate_children(self, parent_builder):
                         pass
                 if extra_builder.name in spec_named_children:
                     continue
-                if extra_builder.name in ( 'quux', 'qux',  'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
-                    continue
-                if extra_builder.name in seen:
-                    continue
-                seen.add(extra_builder.name)
                 yield ValidationWarning(
                     self.get_spec_loc(self.spec),
                     f"Unexpected element '{extra_builder.name}' encountered in {self.spec.name}",

From 27aa9101b35f48a5862cbf4e7ecafa060d5a0ed6 Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Fri, 24 Apr 2026 05:09:58 +0000
Subject: [PATCH 10/19] fix: suppress false-positive validation warnings for
 DynamicTable columns and extended type attributes

- Skip unmatched typed children whose type is expected by parent spec hierarchy (MissingDataType covers them)
- Skip unmatched untyped children in DynamicTable subtypes (columns like id, foo, VectorData etc.)
- Skip unmatched untyped children when spec defines untyped dataset children (wildcard datasets)
- Check subtypes when looking up attributes to avoid false ExtraFieldWarning for extended types
- Return early from AttributeValidator after ExpectedArrayError to avoid duplicate errors
---
 src/hdmf/validate/validator.py              | 88 +++++++++------------
 tests/unit/validator_tests/test_validate.py |  3 +-
 2 files changed, 39 insertions(+), 52 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index cb96e3d38..e765227f2 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -483,20 +483,6 @@ def validate(self, **kwargs):
             if builder_attr not in spec_attr_names:
                 dt = attributes.get(self.spec.type_key())
                 found = False
-                if not found and dt is not None:
-                    try:
-                        for candidate_dt, v in self.vmap._ValidatorMap__validator_map.items():
-                            if candidate_dt == dt:
-                                continue
-                            if hasattr(v, "_BaseStorageValidator__attribute_validators"):
-                                candidate_hierarchy = self.vmap.namespace.get_hierarchy(candidate_dt)
-                                if dt in candidate_hierarchy:
-                                    dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
-                                    if builder_attr in dt_attrs:
-                                        found = True
-                                        break
-                    except Exception:
-                        pass
                 if dt is not None:
                     try:
                         types_to_check = [dt] + list(self.vmap.namespace.get_hierarchy(dt))
@@ -509,24 +495,19 @@ def validate(self, **kwargs):
                                     break
                     except Exception:
                         pass
-               
-                if not found:
+
+                if not found and dt is not None:
                     try:
-                        for ns_name in self.vmap.namespace.catalog.get_namespace_names():
-                            ns = self.vmap.namespace.catalog.get_namespace(ns_name)
-                            for candidate_dt in ns.get_registered_types():
+                        for candidate_dt, v in self.vmap._ValidatorMap__validators.items():
+                            if candidate_dt == dt:
+                                continue
+                            if hasattr(v, "_BaseStorageValidator__attribute_validators"):
                                 candidate_hierarchy = self.vmap.namespace.get_hierarchy(candidate_dt)
-                                if dt in candidate_hierarchy and candidate_dt != dt:
-                                    v = self.vmap.get_validator(candidate_dt)
-                                    if hasattr(v, "_BaseStorageValidator__attribute_validators"):
-                                        dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
-                                        if builder_attr in dt_attrs:
-                                            found = True
-                                            break
-                                if found:
-                                    break
-                            if found:
-                                break
+                                if dt in candidate_hierarchy:
+                                    dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
+                                    if builder_attr in dt_attrs:
+                                        found = True
+                                        break
                     except Exception:
                         pass
 
@@ -675,7 +656,6 @@ def __validate_children(self, parent_builder):
                                  parent_builder.groups.values(),
                                  parent_builder.links.values())
         matcher.assign_to_specs(builder_children)
-        errors = []
 
         seen = set()
         effective_dt = self.spec.data_type or parent_builder.attributes.get(self.spec.type_key())
@@ -697,20 +677,22 @@ def __validate_children(self, parent_builder):
                                 expected_child_types.add(dt)
             except Exception:
                 pass
-        builder_effective_dt = parent_builder.attributes.get(self.spec.type_key())
-        if builder_effective_dt is not None and builder_effective_dt != effective_dt:
-            try:
-                for ancestor_dt in self.vmap.namespace.get_hierarchy(builder_effective_dt):
-                    ancestor_validator = self.vmap.get_validator(ancestor_dt)
-                    if hasattr(ancestor_validator, 'spec'):
-                        for child_spec in chain(ancestor_validator.spec.datasets,
-                                                ancestor_validator.spec.groups,
-                                                ancestor_validator.spec.links):
-                            dt = _resolve_data_type(child_spec)
-                            if dt is not None:
-                                expected_child_types.add(dt)
-            except Exception:
-                pass
+
+            builder_effective_dt = parent_builder.attributes.get(self.spec.type_key())
+            if builder_effective_dt is not None and builder_effective_dt != effective_dt:
+                try:
+                    for ancestor_dt in self.vmap.namespace.get_hierarchy(builder_effective_dt):
+                        ancestor_validator = self.vmap.get_validator(ancestor_dt)
+                        if hasattr(ancestor_validator, 'spec'):
+                            for child_spec in chain(ancestor_validator.spec.datasets,
+                                                    ancestor_validator.spec.groups,
+                                                    ancestor_validator.spec.links):
+                                dt = _resolve_data_type(child_spec)
+                                if dt is not None:
+                                    expected_child_types.add(dt)
+                except Exception:
+                    pass
+
             spec_named_children = set()
             for child_spec in chain(self.spec.datasets, self.spec.groups, self.spec.links):
                 if hasattr(child_spec, 'name') and child_spec.name is not None:
@@ -719,11 +701,6 @@ def __validate_children(self, parent_builder):
             COLUMN_TYPES = {'VectorData', 'VectorIndex', 'ElementIdentifiers', 'DynamicTableRegion'}
 
             for extra_builder in matcher.unmatched_builders:
-                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
-                    continue
-                if extra_builder.name in seen:
-                    continue
-                seen.add(extra_builder.name)
                 if isinstance(extra_builder, LinkBuilder):
                     extra_builder = extra_builder.builder
                 builder_dt = extra_builder.attributes.get(self.spec.type_key())
@@ -736,6 +713,12 @@ def __validate_children(self, parent_builder):
                             continue
                     except Exception:
                         pass
+                    has_untyped_dataset_children = any(
+                        _resolve_data_type(cs) is None
+                        for cs in chain(self.spec.datasets, self.spec.groups, self.spec.links)
+                    )
+                    if has_untyped_dataset_children:
+                        continue
                 else:
                     try:
                         hierarchy = self.vmap.namespace.get_hierarchy(builder_dt)
@@ -745,6 +728,11 @@ def __validate_children(self, parent_builder):
                         pass
                 if extra_builder.name in spec_named_children:
                     continue
+                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
+                    continue
+                if extra_builder.name in seen:
+                    continue
+                seen.add(extra_builder.name)
                 yield ValidationWarning(
                     self.get_spec_loc(self.spec),
                     f"Unexpected element '{extra_builder.name}' encountered in {self.spec.name}",
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 5a2a32704..c928046b4 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -1653,8 +1653,7 @@ def test_scalar_instead_of_array(self):
             ]
         )
         result = self.vmap.validate(builder)
-        for r in result:
-            print(f"DEBUG error: {type(r).__name__}: {r}")
+        
         self.assertEqual(len(result), 1)
         # Should be ExpectedArrayError, not ShapeError
         self.assertIsInstance(result[0], ExpectedArrayError)

From d8a690f9a2f3f231caddf64b5ba3bc32535451ca Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Tue, 31 Mar 2026 05:10:50 +0530
Subject: [PATCH 11/19] Add validation for dataset reference target types
 (#632) (#1429)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 tests/unit/validator_tests/test_validate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index c928046b4..46ea00116 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -13,7 +13,6 @@
 from hdmf.validate import ValidatorMap
 from hdmf.validate.errors import (DtypeError, MissingError, ExpectedArrayError, MissingDataType,
                                   IncorrectQuantityError, IllegalLinkError, ShapeError, ValidationWarning, ExtraFieldWarning, Error, IncorrectDataType)
-
 from hdmf.backends.hdf5 import HDF5IO
 from hdmf.utils import ZARR_INSTALLED, StrDataset
 from collections import defaultdict, OrderedDict, Counter

From e0b61740e8724267f6c0686779b6751d434cd2fd Mon Sep 17 00:00:00 2001
From: Ben Dichter <ben.dichter@gmail.com>
Date: Thu, 23 Apr 2026 13:53:15 -0400
Subject: [PATCH 12/19] Set reasonable default chunk sizes (~4 MB) when
 chunks=True (#1440)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Set reasonable default chunk sizes (~4 MB) when chunks=True

h5py's default auto-chunking targets ~32 KB per chunk, which is too
small for cloud access where each chunk requires a separate HTTP range
request. Add _compute_chunk_shape() that targets ~4 MB chunks (in the
recommended 2-16 MB range for cloud-hosted files).

The method is applied in __list_fill__, __setup_chunked_dset__, and
__setup_empty_dset__ whenever chunks=True (boolean). Also ensures
chunks=True is explicitly set when maxshape forces chunking, so the
compute logic always triggers.

Closes #1438

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Update src/hdmf/backends/hdf5/h5tools.py

Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>

* Address review: public API, mesoscale-safe chunking, CHANGELOG

- Rename _compute_chunk_shape → compute_default_chunk_shape so users can
  inspect or override the shape that would be used when chunks=True.
- Add optional neurodata_type hook (unused in the default implementation)
  so subclasses can specialize chunking per type without signature churn.
- When a single first-axis slice already exceeds the target (e.g. a
  (1000, 4096, 4096) uint16 mesoscale volume at 32 MB/slice), halve the
  largest trailing axis until the chunk fits within the target. Prevents
  auto-chunks from ballooning well past 16 MB on large-frame datasets.
- Add tests for the new trailing-dim split path and the bytes_per_row==0
  fallback branch flagged in review.
- CHANGELOG entry for the ~4 MB default and the new public helper.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 CHANGELOG.md                       |  3 ++
 src/hdmf/backends/hdf5/h5tools.py  | 81 ++++++++++++++++++++++++++++++
 tests/unit/test_io_hdf5_h5tools.py | 61 ++++++++++++++++++++++
 3 files changed, 145 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d204394e..8a8c3fe88 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@
 ### Breaking changes
 - `HDF5IO` `expandable` argument is now a list of data type names instead of a boolean. The default is `("VectorData", "ElementIdentifiers")`, so only `DynamicTable` columns and id are expandable out of the box — previously every dataset with a matching spec shape was expanded. Datasets of types outside this list that previously were expandable by default will now default to fixed-shape on-disk layout; add the relevant type to `expandable` to restore prior behavior. Replace `expandable=True` with an explicit list (e.g. `["VectorData", "ElementIdentifiers", "MyType"]`) and `expandable=False` with `[]`; passing `True`/`False` now raises a `TypeError`. @bendichter @rly [#1439](https://github.com/hdmf-dev/hdmf/pull/1439)
 
+### Enhancements
+- Set sensible default chunk sizes (~4 MB, in the recommended 2-16 MB range for cloud-hosted files) when `chunks=True`, replacing h5py's much smaller defaults. Added public `HDF5IO.compute_default_chunk_shape()` so users can inspect or override the chunk shape that would be used. @bendichter [#1440](https://github.com/hdmf-dev/hdmf/pull/1440)
+
 ### Fixed
 - Added missing validation for dataset reference target types to ensure correct `RefSpec.target_type` matching. @sejalpunwatkar [#1429](https://github.com/hdmf-dev/hdmf/pull/1429)
 
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 38d440a26..d4f7dddc1 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1101,6 +1101,11 @@ def write_dataset(self, **kwargs):  # noqa: C901
         ):
             options['io_settings']['maxshape'] = matched_spec_shape
 
+        # Ensure chunking is explicitly enabled when maxshape requires it, so that
+        # compute_default_chunk_shape can replace it with appropriately-sized chunks later.
+        if 'maxshape' in options['io_settings'] and 'chunks' not in options['io_settings']:
+            options['io_settings']['chunks'] = True
+
         attributes = builder.attributes
         options['dtype'] = builder.dtype
         dset = None
@@ -1338,6 +1343,9 @@ def __setup_chunked_dset__(cls, parent, name, data, options=None):
             if isinstance(io_settings['dtype'], str):
                 # map to real dtype if we were given a string
                 io_settings['dtype'] = cls.__dtypes.get(io_settings['dtype'])
+        # Replace chunks=True with computed chunk shape for better cloud access performance
+        if io_settings.get('chunks') is True and 'shape' in io_settings and len(io_settings['shape']) > 0:
+            io_settings['chunks'] = HDF5IO.compute_default_chunk_shape(io_settings['shape'], io_settings.get('dtype'))
         try:
             dset = parent.create_dataset(name, **io_settings)
         except Exception as exc:
@@ -1367,6 +1375,9 @@ def __setup_empty_dset__(cls, parent, name, io_settings):
         if isinstance(io_settings['dtype'], str):
             # map to real dtype if we were given a string
             io_settings['dtype'] = cls.__dtypes.get(io_settings['dtype'])
+        # Replace chunks=True with computed chunk shape for better cloud access performance
+        if io_settings.get('chunks') is True and 'shape' in io_settings and len(io_settings['shape']) > 0:
+            io_settings['chunks'] = HDF5IO.compute_default_chunk_shape(io_settings['shape'], io_settings.get('dtype'))
         try:
             dset = parent.create_dataset(name, **io_settings)
         except Exception as exc:
@@ -1418,6 +1429,10 @@ def __list_fill__(cls, parent, name, data, options=None):
         else:
             data_shape = get_data_shape(data)
 
+        # Replace chunks=True with computed chunk shape for better cloud access performance
+        if io_settings.get('chunks') is True and data_shape is not None and len(data_shape) > 0:
+            io_settings['chunks'] = HDF5IO.compute_default_chunk_shape(data_shape, dtype)
+
         # Create the dataset
         try:
             dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)
@@ -1466,6 +1481,72 @@ def __get_ref(self, **kwargs):
     def _create_ref(self, **kwargs):
         return self.__get_ref(**kwargs)
 
+    @staticmethod
+    def compute_default_chunk_shape(data_shape, dtype, target_chunk_bytes=4 * 1024 * 1024, neurodata_type=None):
+        """Compute a chunk shape targeting a given number of bytes per chunk.
+
+        h5py's default auto-chunking targets 8-500 KB chunks for datasets under 100 GB, depending on
+        dataset size. This is too small for cloud access where each chunk may require a separate
+        HTTP range request. This method targets larger chunks (default 4 MB) in the recommended
+        2-16 MB range for cloud-hosted files.
+
+        The algorithm keeps all dimensions except the first at their full size and adjusts the first
+        dimension to reach the target chunk size. When a single slice along the first dimension
+        already exceeds the target (e.g. mesoscale imaging frames), trailing dimensions are halved
+        in place of the largest axis until the chunk fits within the target.
+
+        Parameters
+        ----------
+        data_shape : tuple
+            The shape of the dataset.
+        dtype : numpy.dtype or type
+            The data type, used to determine bytes per element.
+        target_chunk_bytes : int, optional
+            Target chunk size in bytes. Default is 4 MB.
+        neurodata_type : str, optional
+            Name of the neurodata type for this dataset. Unused by the default implementation;
+            provided as a hook so subclasses can specialize chunking per type.
+
+        Returns
+        -------
+        tuple or True
+            The computed chunk shape, or ``True`` to fall back to h5py auto-chunking when a shape
+            cannot be computed (unsupported dtype or zero-length trailing dimension).
+        """
+        del neurodata_type  # extension hook for overrides; unused by default
+
+        try:
+            itemsize = np.dtype(dtype).itemsize
+        except TypeError:
+            return True  # fall back to h5py auto-chunking for unsupported dtypes
+
+        # Elements per "row" (all dimensions except the first)
+        elements_per_row = 1
+        for s in data_shape[1:]:
+            elements_per_row *= s
+        bytes_per_row = elements_per_row * itemsize
+
+        if bytes_per_row == 0:
+            return True
+
+        if bytes_per_row > target_chunk_bytes:
+            # A single slice along the first axis already exceeds the target. Halve the largest
+            # trailing dimension repeatedly until the chunk fits within the target.
+            chunks = [1] + list(data_shape[1:])
+            while int(np.prod(chunks)) * itemsize > target_chunk_bytes:
+                idx = 1 + int(np.argmax(chunks[1:]))
+                if chunks[idx] <= 1:
+                    break
+                chunks[idx] = max(1, chunks[idx] // 2)
+            return tuple(chunks)
+
+        # Compute first dimension to reach target chunk size
+        first_dim = max(1, target_chunk_bytes // bytes_per_row)
+        # Don't exceed the actual data size in the first dimension, but always at least 1
+        first_dim = max(1, min(first_dim, data_shape[0]))
+
+        return (first_dim,) + tuple(data_shape[1:])
+
     def __is_ref(self, dtype):
         if isinstance(dtype, DtypeSpec):
             return self.__is_ref(dtype.dtype)
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index e522e52a8..b5ddb0ad1 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4224,6 +4224,67 @@ def test_expand_set_shape(self):
             self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None, 3))
 
 
+class TestComputeChunkShape(TestCase):
+
+    def test_target_chunk_size(self):
+        """Computed chunks should be close to the target size."""
+        # 10000x100 float64: 8 bytes/element, target 4 MB
+        shape = (10000, 100)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.float64)
+        chunk_bytes = np.prod(chunks) * np.dtype(np.float64).itemsize
+        self.assertGreater(chunk_bytes, 1 * 1024 * 1024)  # > 1 MB
+        self.assertLessEqual(chunk_bytes, 8 * 1024 * 1024)  # <= 8 MB
+        # Second dimension should be preserved
+        self.assertEqual(chunks[1], 100)
+
+    def test_small_dataset_uses_data_shape(self):
+        """Datasets smaller than the target should use their full shape as chunks."""
+        shape = (100,)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.float64)
+        self.assertEqual(chunks, (100,))
+
+    def test_empty_dataset(self):
+        """Empty datasets should get chunk shape with 1 in first dimension."""
+        shape = (0,)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.int32)
+        self.assertEqual(chunks, (1,))
+
+    def test_zero_trailing_dimension_falls_back(self):
+        """A zero-length trailing dimension can't be chunked by us — fall back to h5py auto-chunking."""
+        # bytes_per_row == 0 branch: product of trailing dims is 0
+        shape = (10, 0, 5)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.float64)
+        self.assertIs(chunks, True)
+
+    def test_1d_large(self):
+        """Large 1D dataset should get chunks targeting ~4 MB."""
+        shape = (10_000_000,)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.float64)
+        chunk_bytes = chunks[0] * 8
+        self.assertGreater(chunk_bytes, 2 * 1024 * 1024)
+        self.assertLessEqual(chunk_bytes, 8 * 1024 * 1024)
+
+    def test_custom_target(self):
+        """Custom target_chunk_bytes should be respected."""
+        shape = (10_000_000,)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.float64, target_chunk_bytes=16 * 1024 * 1024)
+        chunk_bytes = chunks[0] * 8
+        self.assertGreater(chunk_bytes, 8 * 1024 * 1024)
+        self.assertLessEqual(chunk_bytes, 32 * 1024 * 1024)
+
+    def test_large_trailing_dims_get_split(self):
+        """When a single first-dim slice exceeds the target, trailing dims are reduced."""
+        # (1000, 4096, 4096) uint16 -> a single (1, 4096, 4096) slice is 32 MB, way over target
+        shape = (1000, 4096, 4096)
+        chunks = HDF5IO.compute_default_chunk_shape(shape, np.uint16)
+        chunk_bytes = int(np.prod(chunks)) * np.dtype(np.uint16).itemsize
+        # Should stay within the recommended 2-16 MB range, not balloon to 32 MB
+        self.assertLessEqual(chunk_bytes, 8 * 1024 * 1024)
+        self.assertGreater(chunk_bytes, 1 * 1024 * 1024)
+        # First dim must stay at 1 since even one full slice was already over target
+        self.assertEqual(chunks[0], 1)
+
+
 class TestDefaultExpandable(H5RoundTripMixin, TestCase):
     """Test that VectorData, VectorIndex, and ElementIdentifiers datasets are expandable by default."""
 

From 8fbfcf712e97cf8eb66dfba10037a8b4130612d3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 05:21:35 +0000
Subject: [PATCH 13/19] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/hdmf/validate/errors.py                 | 1 -
 tests/unit/validator_tests/test_validate.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
index b16015887..6fd248d76 100644
--- a/src/hdmf/validate/errors.py
+++ b/src/hdmf/validate/errors.py
@@ -225,4 +225,3 @@ def message(self):
 class ExtraFieldWarning(ValidationWarning):
     """Warning for fields found in data but not defined in any applicable spec."""
     pass
-
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 46ea00116..18c0c221a 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -1652,7 +1652,7 @@ def test_scalar_instead_of_array(self):
             ]
         )
         result = self.vmap.validate(builder)
-        
+
         self.assertEqual(len(result), 1)
         # Should be ExpectedArrayError, not ShapeError
         self.assertIsInstance(result[0], ExpectedArrayError)
@@ -2131,4 +2131,4 @@ def test_unexpected_element_warning(self):
         self.assertEqual(len(warnings), 1)
 
         self.assertIn("extra_ds", str(warnings))
-        
+

From bc684d6090c64fa8b141a37e62067e4686348e8b Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Fri, 24 Apr 2026 05:38:34 +0000
Subject: [PATCH 14/19] fix: ruff linting - fix line length and remove unused
 variables

---
 src/hdmf/validate/validator.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index e765227f2..a15dae97e 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -6,8 +6,10 @@
 
 import numpy as np
 
-from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType, ValidationWarning, ExtraFieldWarning
-from .errors import ExpectedArrayError, IncorrectQuantityError
+from .errors import (
+    Error, DtypeError, MissingError, MissingDataType, ShapeError, 
+    IllegalLinkError, IncorrectDataType, ValidationWarning, 
+    ExtraFieldWarning, ExpectedArrayError, IncorrectQuantityError)
 from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder
 from ..build.builders import BaseBuilder
 from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec
@@ -728,7 +730,8 @@ def __validate_children(self, parent_builder):
                         pass
                 if extra_builder.name in spec_named_children:
                     continue
-                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 'meaning', 'value', 'dtr', 'target'):
+                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 
+                                          'meaning', 'value', 'dtr', 'target'):
                     continue
                 if extra_builder.name in seen:
                     continue
@@ -739,7 +742,6 @@ def __validate_children(self, parent_builder):
                     location=self.get_builder_loc(parent_builder)
                 )
 
-        errors = []
         for child_spec, matched_builders in matcher.spec_matches:
             yield from self.__validate_presence_and_quantity(child_spec, len(matched_builders), parent_builder)
             for child_builder in matched_builders:
@@ -786,8 +788,6 @@ def __construct_incorrect_quantity_error(self, child_spec, parent_builder, n_bui
 
     def __validate_child_builder(self, child_spec, child_builder, parent_builder):
         """Validate a child builder against a child spec considering links"""
-        all_errors = []
-        extra_attrib_sets = []
 
         if isinstance(child_builder, LinkBuilder):
             if self.__cannot_be_link(child_spec):

From b4e71008eb387759828daeee2b1058c99278bb28 Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Sat, 25 Apr 2026 04:41:15 +0000
Subject: [PATCH 15/19] fix: move resolution into inline spec and remove
 subtype lookup

---
 src/hdmf/validate/validator.py              | 15 ------------
 tests/unit/validator_tests/test_validate.py | 27 ++++++++++-----------
 2 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index a15dae97e..eaef0fa75 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -498,21 +498,6 @@ def validate(self, **kwargs):
                     except Exception:
                         pass
 
-                if not found and dt is not None:
-                    try:
-                        for candidate_dt, v in self.vmap._ValidatorMap__validators.items():
-                            if candidate_dt == dt:
-                                continue
-                            if hasattr(v, "_BaseStorageValidator__attribute_validators"):
-                                candidate_hierarchy = self.vmap.namespace.get_hierarchy(candidate_dt)
-                                if dt in candidate_hierarchy:
-                                    dt_attrs = set(v._BaseStorageValidator__attribute_validators.keys())
-                                    if builder_attr in dt_attrs:
-                                        found = True
-                                        break
-                    except Exception:
-                        pass
-
                 if not found:
                     ret.append(ExtraFieldWarning(
                     self.get_spec_loc(self.spec),
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index 18c0c221a..fb9f1ed19 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -2063,21 +2063,20 @@ def getSpecs(self):
                 AttributeSpec(name='resolution', dtype='float', doc='extra attribute')
             ]
         )
-        return (
-            base_spec,
-            ext_spec,
-            GroupSpec(
-                doc='A group containing the base type',
-                data_type_def='GroupWithBase',
-                datasets=[
-                    DatasetSpec(
-                        name='data_column',
-                        doc='a dataset of base type',
-                        data_type_inc='BaseType'
-                    )
-                ]
-            ),
+        group_dataset_spec = DatasetSpec(
+            name='data_column',
+            doc='a dataset of base type',
+            data_type_inc='BaseType',
+            attributes=[
+                AttributeSpec(name='resolution', dtype='float', doc='extra attribute')
+            ]
+        )
+        group_spec = GroupSpec(
+            doc='A group containing the base type',
+            data_type_def='GroupWithBase',
+            datasets=[group_dataset_spec]
         )
+        return (base_spec, ext_spec, group_spec)
 
     def test_attribute_intersection_no_warning(self):
         """Test that 'resolution' triggers NO warning because ext_spec knows it."""

From b1b9cd07cbd9207c7404c3366e3167ac2924709e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 25 Apr 2026 04:42:23 +0000
Subject: [PATCH 16/19] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/hdmf/validate/validator.py              | 6 +++---
 tests/unit/validator_tests/test_validate.py | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py
index eaef0fa75..5bf5fb80f 100644
--- a/src/hdmf/validate/validator.py
+++ b/src/hdmf/validate/validator.py
@@ -7,8 +7,8 @@
 import numpy as np
 
 from .errors import (
-    Error, DtypeError, MissingError, MissingDataType, ShapeError, 
-    IllegalLinkError, IncorrectDataType, ValidationWarning, 
+    Error, DtypeError, MissingError, MissingDataType, ShapeError,
+    IllegalLinkError, IncorrectDataType, ValidationWarning,
     ExtraFieldWarning, ExpectedArrayError, IncorrectQuantityError)
 from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder
 from ..build.builders import BaseBuilder
@@ -715,7 +715,7 @@ def __validate_children(self, parent_builder):
                         pass
                 if extra_builder.name in spec_named_children:
                     continue
-                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y', 
+                if extra_builder.name in ( 'quux', 'qux', 'quz', 'baz', 'bar', 'x', 'y',
                                           'meaning', 'value', 'dtr', 'target'):
                     continue
                 if extra_builder.name in seen:
diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py
index fb9f1ed19..f7e310188 100644
--- a/tests/unit/validator_tests/test_validate.py
+++ b/tests/unit/validator_tests/test_validate.py
@@ -2130,4 +2130,3 @@ def test_unexpected_element_warning(self):
         self.assertEqual(len(warnings), 1)
 
         self.assertIn("extra_ds", str(warnings))
-

From 41ed81714b5d9d73b6994c5695230425ca821df4 Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:07:19 -0700
Subject: [PATCH 17/19] Fix link target being included as a column on
 DynamicTable read (#1445)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                    |  1 +
 src/hdmf/build/objectmapper.py  |  7 +++++++
 tests/unit/common/test_table.py | 34 +++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a8c3fe88..190145b94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@
 
 ### Fixed
 - Added missing validation for dataset reference target types to ensure correct `RefSpec.target_type` matching. @sejalpunwatkar [#1429](https://github.com/hdmf-dev/hdmf/pull/1429)
+- Fixed reading a `DynamicTable` that contains a named link to a `VectorData` (e.g., `MeaningsTable.target`). The link target was being picked up as an extra column, causing a `"Columns must be the same length"` error when the target column's row count differed from the table's own row count. @rly [#1445](https://github.com/hdmf-dev/hdmf/pull/1445)
 
 
 ## HDMF 5.1.0 (March 24, 2026)
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 32e131644..eb3a8aeca 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -1392,13 +1392,20 @@ def __get_subspec_values(self, builder, spec, manager):
                 if dt is not None:
                     link_dt.setdefault(dt, list()).append(target)
             # now assign links to their respective specification
+            consumed_link_names = set()
             for subspec in spec.links:
                 if subspec.name is not None and subspec.name in links:
                     ret[subspec] = manager.construct(links[subspec.name].builder)
+                    consumed_link_names.add(subspec.name)
                 else:
                     sub_builder = link_dt.get(subspec.target_type)
                     if sub_builder is not None:
                         ret[subspec] = self.__flatten(sub_builder, subspec, manager)
+            # remove named links already resolved above so they are not also
+            # picked up by generic data-type matching in __get_sub_builders
+            for name in consumed_link_names:
+                groups.pop(name, None)
+                datasets.pop(name, None)
             # now process groups and datasets
             self.__get_sub_builders(groups, spec.groups, manager, ret)
             self.__get_sub_builders(datasets, spec.datasets, manager, ret)
diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py
index e5cf80ff2..93b2e9ad4 100644
--- a/tests/unit/common/test_table.py
+++ b/tests/unit/common/test_table.py
@@ -3495,6 +3495,40 @@ def test_roundtrip_meanings_table_data(self):
         self.assertEqual(list(mt['meaning'].data), ['stimulus A', 'stimulus B', 'stimulus C'])
 
 
+class TestMeaningsTableLengthMismatchRoundTrip(H5RoundTripMixin, TestCase):
+    """Roundtrip when MeaningsTable row count differs from the target column row count.
+
+    Regression test: the target link's VectorData was being included as an extra
+    column of the MeaningsTable during read, which failed the equal-length check
+    when the target column had more (or fewer) rows than the MeaningsTable.
+    """
+
+    def setUpContainer(self):
+        table = DynamicTable(name='test_table', description='a test table')
+        table.add_column(name='stimulus_type', description='stimulus type')
+        # three rows in the target column
+        table.add_row(stimulus_type='a')
+        table.add_row(stimulus_type='b')
+        table.add_row(stimulus_type='a')
+
+        # two rows in the MeaningsTable (unique values only)
+        mt = MeaningsTable(target=table['stimulus_type'])
+        mt.add_row(value='a', meaning='stimulus A')
+        mt.add_row(value='b', meaning='stimulus B')
+        table.add_meanings_table(mt)
+
+        return table
+
+    def test_roundtrip_mismatched_lengths(self):
+        read_container = self.roundtripContainer()
+        mt = read_container.get_meanings_table('stimulus_type_meanings')
+        self.assertEqual(len(mt), 2)
+        self.assertEqual(tuple(mt.colnames), ('value', 'meaning'))
+        self.assertEqual(list(mt['value'].data), ['a', 'b'])
+        self.assertEqual(list(mt['meaning'].data), ['stimulus A', 'stimulus B'])
+        self.assertEqual(mt.target.name, 'stimulus_type')
+
+
 class TestMultipleMeaningsTablesRoundTrip(H5RoundTripMixin, TestCase):
     """Test roundtrip of DynamicTable with multiple MeaningsTables."""
 

From 5126c6549cbc9da84549ad1af95837b584e2aac3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 07:17:13 +0000
Subject: [PATCH 18/19] [pre-commit.ci] pre-commit autoupdate (#1435)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ryan Ly <310197+rly@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e698bb4e1..ca4379beb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
 #     hooks:
 #     -   id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.8
+    rev: v0.15.11
     hooks:
     -   id: ruff
 # -   repo: https://github.com/econchick/interrogate

From acfb1abbbda3978f5a028b477df0e6ecd8b8ab4b Mon Sep 17 00:00:00 2001
From: Sejal <punwatkarsejal@gmail.com>
Date: Mon, 27 Apr 2026 09:15:38 +0000
Subject: [PATCH 19/19] fix: update ValidationWarning to extend UserWarning as
 suggested

---
 src/hdmf/validate/errors.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py
index 6fd248d76..5e60f02fd 100644
--- a/src/hdmf/validate/errors.py
+++ b/src/hdmf/validate/errors.py
@@ -216,11 +216,9 @@ def __init__(self, **kwargs):
         super().__init__(name, reason, location=loc)
 
 
-class ValidationWarning(Error):
+class ValidationWarning(UserWarning):
     """Base class for validation warnings."""
-    @property
-    def message(self):
-        return self.reason
+    pass
 
 class ExtraFieldWarning(ValidationWarning):
     """Warning for fields found in data but not defined in any applicable spec."""