Skip to content

Commit d46a901

Browse files
committed
Fix circular dependency handling in spec resolution (#1388)
Add tests Debug errors Fix and refactor stochastic resolution errors by not using graphlib static_order Refactor to ensure correct ordering Test
1 parent 1ad72a7 commit d46a901

3 files changed

Lines changed: 207 additions & 65 deletions

File tree

src/hdmf/spec/namespace.py

Lines changed: 103 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -430,49 +430,61 @@ def __collect_nested_subspecs(self, spec: GroupSpec) -> list[BaseStorageSpec]:
430430
nested_subspecs.extend(self.__collect_nested_subspecs(subgroup_spec))
431431
return nested_subspecs
432432

433-
def __get_spec_dependencies(self, spec: BaseStorageSpec) -> set[tuple[str, str]]:
434-
"""Get the set of edges representing the dependencies of the given spec."""
433+
def __get_spec_dependencies(
434+
self, spec: BaseStorageSpec, catalog: SpecCatalog
435+
) -> tuple[set[tuple[str, str]], set[tuple[str, str]]]:
436+
"""Get edges representing the dependencies of the given spec.
437+
438+
Returns (edges, skipped_edges) where skipped_edges are cycle-creating edges
439+
that were excluded from the dependency graph.
440+
"""
435441
edges = set()
442+
skipped_edges = set()
436443
if spec.data_type_inc is not None:
437-
# The included spec should be resolved before this spec
438444
edges.add((spec.data_type_def, spec.data_type_inc))
439445
if isinstance(spec, GroupSpec):
440-
# For each nested subspec, the included specs of that nested subspec should be resolved before
441-
# this spec
442446
nested_subspecs = self.__collect_nested_subspecs(spec)
443447
for subspec in nested_subspecs:
444448
if subspec.data_type_inc is not None:
445-
# TODO: cycles are not yet supported
446-
# if spec.data_type_def == subspec.data_type_inc:
447-
# # Allow the simple case of a "cycle" where A contains B, and B includes A
448-
# # but do not add this edge to the graph because it makes a cycle.
449-
# continue
449+
if spec.data_type_def == subspec.data_type_inc:
450+
skipped_edges.add((spec.data_type_def, subspec.data_type_inc))
451+
continue
452+
hierarchy = catalog.get_hierarchy(subspec.data_type_inc)
453+
if spec.data_type_def in hierarchy:
454+
skipped_edges.add((spec.data_type_def, subspec.data_type_inc))
455+
continue
450456
edges.add((spec.data_type_def, subspec.data_type_inc))
451-
return edges
457+
return edges, skipped_edges
458+
459+
def __resolve_local(
460+
self, namespace: SpecNamespace, spec: BaseStorageSpec, resolving_in_progress: set = None
461+
) -> None:
462+
"""Resolve the data_type_inc of this spec and its children recursively.
463+
464+
Parameters
465+
----------
466+
namespace : SpecNamespace
467+
The namespace containing the specs.
468+
spec : BaseStorageSpec
469+
The spec to resolve.
470+
resolving_in_progress : set, optional
471+
Set of type names currently being resolved, used to prevent infinite recursion
472+
in circular dependencies.
473+
"""
474+
if resolving_in_progress is None:
475+
resolving_in_progress = set()
452476

453-
def __resolve_local(self, namespace: SpecNamespace, spec: BaseStorageSpec) -> None:
454477
if spec.data_type_inc is not None and not spec.inc_spec_resolved:
455-
# NOTE: The included spec may have already been resolved into the current spec if the current spec
456-
# was copied (included) from another spec. For example, if A has a subspec B that includes C, and
457-
# D includes A, then when resolving D, first, already resolved subspec B is copied from A to D, and
458-
# then resolve_local may be called on B again
478+
if spec.data_type_inc in resolving_in_progress:
479+
return
459480
included_spec = self.get_spec(namespace.name, spec.data_type_inc)
460-
461-
# NOTE: In most cases, because we are resolving specs in topological order, the included spec
462-
# should have already been resolved. However, in the case of the "cycle" described above where
463-
# A contains B, and B includes A, then the included spec will not have been resolved yet.
464-
465-
# Resolve the included spec into this spec
466481
spec.resolve_inc_spec(included_spec, namespace)
482+
resolving_in_progress = resolving_in_progress | {spec.data_type_inc}
467483

468484
if isinstance(spec, GroupSpec):
469-
# Recursively resolve all subspecs
470-
nested_subspecs = self.__collect_nested_subspecs(spec)
471-
for subspec in nested_subspecs:
472-
self.__resolve_local(namespace, subspec)
485+
for subspec in list(spec.groups) + list(spec.datasets):
486+
self.__resolve_local(namespace, subspec, resolving_in_progress)
473487

474-
# Mark this spec as resolved if the included spec has been resolved and all subspecs have been resolved.
475-
# This is not necessary / not used anywhere, but may be useful for debugging.
476488
spec.resolved = True
477489

478490
def resolve_all_specs(self) -> None:
@@ -481,38 +493,65 @@ def resolve_all_specs(self) -> None:
481493
self.__resolve_namespace_specs(namespace)
482494

483495
def __resolve_namespace_specs(self, namespace: SpecNamespace) -> None:
484-
"""Resolve all specs in the catalog."""
485-
# Build a graph of all type dependencies
486-
# For example, if A includes B, A has subspec that includes C, and B includes D, then A -> B, A -> C, B -> D
496+
"""Resolve all specs in the catalog using topological sort order.
497+
498+
A dependency graph is built from each type's data_type_inc and nested subspec data_type_inc values.
499+
Cycle-creating edges are skipped. The graph is then sorted topologically, and types at the same
500+
level are sorted alphabetically to ensure deterministic ordering across environments.
501+
"""
502+
# Build dependency graph, skipping edges that would create cycles
487503
ts = graphlib.TopologicalSorter()
488-
specs_without_deps = set() # track specs that have no dependencies
504+
all_type_names = set()
505+
all_skipped_edges = set()
489506
for type_name in namespace.catalog.get_registered_types():
507+
all_type_names.add(type_name)
490508
spec = namespace.catalog.get_spec(type_name)
491-
edges = self.__get_spec_dependencies(spec)
492-
if not edges:
493-
specs_without_deps.add(type_name)
494-
else:
495-
for e in edges:
496-
ts.add(*e)
497-
498-
# Check for cycles and get static topological order
499-
# For example, in the ABCD example above, the static order is D, B, C, A
500-
try:
501-
static_order = list(ts.static_order())
502-
except graphlib.CycleError: # pragma: no cover
503-
# This should not happen because cycles will cause an error during spec object creation
504-
raise ValueError("Cycle detected in specification dependencies. Cannot resolve specifications.")
505-
506-
# In rare cases, a namespace may have specs that have no dependencies and are not included by any other
507-
# spec, so they will not be in the topological sort. Add them to the front of the order.
508-
for s in specs_without_deps:
509+
edges, skipped_edges = self.__get_spec_dependencies(spec, namespace.catalog)
510+
all_skipped_edges.update(skipped_edges)
511+
for edge in edges:
512+
ts.add(*edge)
513+
514+
# For types that inherit from a type with skipped (cycle) edges, add dependencies on the
515+
# cycle partner. E.g., if DynamicTable -> MeaningsTable was skipped (cycle), and
516+
# AlignedDynamicTable extends DynamicTable, add AlignedDynamicTable -> MeaningsTable.
517+
# This ensures MeaningsTable is resolved before AlignedDynamicTable, so that when
518+
# AlignedDynamicTable inherits DynamicTable's MeaningsTable subspec and re-resolves it,
519+
# the catalog MeaningsTable is already fully resolved.
520+
# Walk the full inheritance hierarchy to handle longer chains (D extends C extends A).
521+
if all_skipped_edges:
522+
skipped_deps = dict()
523+
for parent, dep in all_skipped_edges:
524+
skipped_deps.setdefault(parent, set()).add(dep)
525+
for type_name in namespace.catalog.get_registered_types():
526+
for ancestor in namespace.catalog.get_hierarchy(type_name):
527+
if ancestor not in skipped_deps:
528+
continue
529+
for dep in skipped_deps[ancestor]:
530+
if dep == type_name:
531+
continue
532+
# Only add if it won't create a new cycle
533+
if type_name not in namespace.catalog.get_hierarchy(dep):
534+
ts.add(type_name, dep)
535+
536+
# Get topological order using level-by-level processing with alphabetical sorting
537+
# within each level to ensure deterministic ordering regardless of insertion order.
538+
ts.prepare()
539+
static_order = []
540+
while ts.is_active():
541+
ready = sorted(ts.get_ready()) # alphabetical sort within each level
542+
static_order.extend(ready)
543+
for node in ready:
544+
ts.done(node)
545+
546+
# Add types that have no dependencies and are not depended on by any other type
547+
for s in all_type_names:
509548
if s not in static_order:
510549
static_order.insert(0, s)
511550

512551
# Resolve specs in topological order
513552
for type_name in static_order:
514553
spec = self.get_spec(namespace.name, type_name)
515-
self.__resolve_local(namespace, spec)
554+
self.__resolve_local(namespace, spec, resolving_in_progress={type_name})
516555

517556

518557
def __load_namespace(self, namespace, reader):
@@ -558,42 +597,42 @@ def __load_namespace(self, namespace, reader):
558597

559598
return included_types
560599

561-
def __register_type(self, ndt, inc_ns, catalog, registered_types, in_progress_registrations=None):
600+
def __register_type(self, ndt, inc_ns, catalog, registered_types, registering_in_progress=None):
562601
"""Register a type and its dependencies from a namespace into a catalog.
563602
564603
:param ndt: The name of the data type to register
565604
:param inc_ns: The namespace containing the type
566605
:param catalog: The catalog to register the type into
567606
:param registered_types: Set of already registered types (to avoid re-registering)
568-
:param in_progress_registrations: Set of types currently being registered (for circular dependency detection).
607+
:param registering_in_progress: Set of types currently being registered (for circular dependency detection).
569608
None if the registration process is starting.
570609
"""
571-
if in_progress_registrations is None:
572-
in_progress_registrations = set()
573-
if ndt in registered_types or ndt in in_progress_registrations:
610+
if registering_in_progress is None:
611+
registering_in_progress = set()
612+
if ndt in registered_types or ndt in registering_in_progress:
574613
# Already registered or currently being registered (circular dependency)
575614
return
576615
# Track that we're currently registering this type
577-
in_progress_registrations.add(ndt)
616+
registering_in_progress.add(ndt)
578617
spec = inc_ns.get_spec(ndt)
579618
spec_file = inc_ns.catalog.get_spec_source_file(ndt)
580-
self.__register_dependent_types(spec, inc_ns, catalog, registered_types, in_progress_registrations)
619+
self.__register_dependent_types(spec, inc_ns, catalog, registered_types, registering_in_progress)
581620
if isinstance(spec, DatasetSpec):
582621
built_spec = self.dataset_spec_cls.build_spec(spec)
583622
else:
584623
built_spec = self.group_spec_cls.build_spec(spec)
585624
registered_types.add(ndt)
586625
catalog.register_spec(built_spec, spec_file)
587626

588-
def __register_dependent_types(self, spec, inc_ns, catalog, registered_types, in_progress_registrations):
627+
def __register_dependent_types(self, spec, inc_ns, catalog, registered_types, registering_in_progress):
589628
"""Ensure that all types used by this type are registered.
590629
591630
This handles:
592631
- Parent types (data_type_inc)
593632
- Nested type definitions (data_type_def in child specs)
594633
- Link target types
595634
596-
Circular dependencies are handled by checking in_progress_registrations before
635+
Circular dependencies are handled by checking registering_in_progress before
597636
recursing. This allows patterns like:
598637
- Type A contains Type B, and Type B extends Type A
599638
- Type A contains a reference to Type A (self-reference)
@@ -602,23 +641,23 @@ def __register_dependent_types_helper(child_spec):
602641
if isinstance(child_spec, (GroupSpec, DatasetSpec)):
603642
if child_spec.data_type_inc is not None:
604643
self.__register_type(
605-
child_spec.data_type_inc, inc_ns, catalog, registered_types, in_progress_registrations
644+
child_spec.data_type_inc, inc_ns, catalog, registered_types, registering_in_progress
606645
)
607646
if child_spec.data_type_def is not None: # nested type definition
608647
self.__register_type(
609-
child_spec.data_type_def, inc_ns, catalog, registered_types, in_progress_registrations
648+
child_spec.data_type_def, inc_ns, catalog, registered_types, registering_in_progress
610649
)
611650
else: # spec is a LinkSpec
612651
self.__register_type(
613-
child_spec.target_type, inc_ns, catalog, registered_types, in_progress_registrations
652+
child_spec.target_type, inc_ns, catalog, registered_types, registering_in_progress
614653
)
615654
if isinstance(child_spec, GroupSpec):
616655
for nested_spec in (child_spec.groups + child_spec.datasets + child_spec.links):
617656
__register_dependent_types_helper(nested_spec)
618657

619658
# Register parent type first
620659
if spec.data_type_inc is not None:
621-
self.__register_type(spec.data_type_inc, inc_ns, catalog, registered_types, in_progress_registrations)
660+
self.__register_type(spec.data_type_inc, inc_ns, catalog, registered_types, registering_in_progress)
622661

623662
# Register types from child specs (groups, datasets, links)
624663
if isinstance(spec, GroupSpec):

tests/unit/spec_tests/test_load_namespace.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,109 @@ def test_self_referential_type(self):
675675
registered_types = core_ns.get_registered_types()
676676
self.assertIn('RecursiveContainer', registered_types)
677677

678+
def test_circular_dependency_multi_level_inheritance(self):
679+
"""Test loading types where A contains C, C extends B, and B extends A.
680+
681+
This tests that the cycle detection walks the full inheritance hierarchy,
682+
not just the direct parent.
683+
"""
684+
a_spec = GroupSpec(
685+
'Type A',
686+
data_type_def='TypeA',
687+
groups=[
688+
GroupSpec(
689+
'A collection of TypeC',
690+
data_type_inc='TypeC',
691+
quantity='*',
692+
)
693+
]
694+
)
695+
b_spec = GroupSpec(
696+
'Type B extends A',
697+
data_type_def='TypeB',
698+
data_type_inc='TypeA',
699+
)
700+
c_spec = GroupSpec(
701+
'Type C extends B (and thus transitively A)',
702+
data_type_def='TypeC',
703+
data_type_inc='TypeB',
704+
)
705+
706+
core_ns_builder = NamespaceBuilder('Core namespace', 'core', version='1.0.0')
707+
core_ns_builder.add_spec(self.core_source, a_spec)
708+
core_ns_builder.add_spec(self.core_source, b_spec)
709+
core_ns_builder.add_spec(self.core_source, c_spec)
710+
core_ns_builder.export(self.core_ns_path, outdir=self.tempdir)
711+
712+
ns_catalog = NamespaceCatalog()
713+
ns_catalog.load_namespaces(os.path.join(self.tempdir, self.core_ns_path))
714+
715+
core_ns = ns_catalog.get_namespace('core')
716+
registered_types = core_ns.get_registered_types()
717+
self.assertIn('TypeA', registered_types)
718+
self.assertIn('TypeB', registered_types)
719+
self.assertIn('TypeC', registered_types)
720+
721+
hierarchy = ns_catalog.get_hierarchy('core', 'TypeC')
722+
self.assertEqual(hierarchy, ('TypeC', 'TypeB', 'TypeA'))
723+
724+
def test_circular_dependency_subtype_inherits_from_circular_parent(self):
725+
"""Test loading a type that extends a type involved in a circular dependency.
726+
727+
This is the pattern that caused infinite recursion:
728+
- ParentTable contains ChildTable (circular dep)
729+
- ChildTable extends ParentTable (circular dep)
730+
- ExtendedTable extends ParentTable (inherits the circular dep)
731+
732+
Similar to AlignedDynamicTable extending DynamicTable which contains MeaningsTable.
733+
"""
734+
parent_table_spec = GroupSpec(
735+
'A parent table type',
736+
data_type_def='ParentTable',
737+
groups=[
738+
GroupSpec(
739+
'A collection of child tables',
740+
data_type_inc='ChildTable',
741+
quantity='*',
742+
)
743+
]
744+
)
745+
child_table_spec = GroupSpec(
746+
'A child table that extends ParentTable',
747+
data_type_def='ChildTable',
748+
data_type_inc='ParentTable',
749+
)
750+
extended_table_spec = GroupSpec(
751+
'An extended table that also extends ParentTable',
752+
data_type_def='ExtendedTable',
753+
data_type_inc='ParentTable',
754+
groups=[
755+
GroupSpec(
756+
'An additional group specific to ExtendedTable',
757+
data_type_inc='ParentTable',
758+
quantity='*',
759+
)
760+
]
761+
)
762+
763+
core_ns_builder = NamespaceBuilder('Core namespace', 'core', version='1.0.0')
764+
core_ns_builder.add_spec(self.core_source, parent_table_spec)
765+
core_ns_builder.add_spec(self.core_source, child_table_spec)
766+
core_ns_builder.add_spec(self.core_source, extended_table_spec)
767+
core_ns_builder.export(self.core_ns_path, outdir=self.tempdir)
768+
769+
ns_catalog = NamespaceCatalog()
770+
ns_catalog.load_namespaces(os.path.join(self.tempdir, self.core_ns_path))
771+
772+
core_ns = ns_catalog.get_namespace('core')
773+
registered_types = core_ns.get_registered_types()
774+
self.assertIn('ParentTable', registered_types)
775+
self.assertIn('ChildTable', registered_types)
776+
self.assertIn('ExtendedTable', registered_types)
777+
778+
hierarchy = ns_catalog.get_hierarchy('core', 'ExtendedTable')
779+
self.assertEqual(hierarchy, ('ExtendedTable', 'ParentTable'))
780+
678781
def test_link_circular_dependency(self):
679782
"""Test circular dependency through links."""
680783
# Create types where A links to B, and B extends A

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ extras =
2828
gallery: docs
2929
optional: tqdm
3030
optional: sparse
31-
optional: zarr
31+
; optional: zarr
3232
optional: termset
3333
minimum: min-reqs
3434
commands =

0 commit comments

Comments
 (0)