@@ -430,49 +430,61 @@ def __collect_nested_subspecs(self, spec: GroupSpec) -> list[BaseStorageSpec]:
430430 nested_subspecs .extend (self .__collect_nested_subspecs (subgroup_spec ))
431431 return nested_subspecs
432432
433- def __get_spec_dependencies (self , spec : BaseStorageSpec ) -> set [tuple [str , str ]]:
434- """Get the set of edges representing the dependencies of the given spec."""
433+ def __get_spec_dependencies (
434+ self , spec : BaseStorageSpec , catalog : SpecCatalog
435+ ) -> tuple [set [tuple [str , str ]], set [tuple [str , str ]]]:
436+ """Get edges representing the dependencies of the given spec.
437+
438+ Returns (edges, skipped_edges) where skipped_edges are cycle-creating edges
439+ that were excluded from the dependency graph.
440+ """
435441 edges = set ()
442+ skipped_edges = set ()
436443 if spec .data_type_inc is not None :
437- # The included spec should be resolved before this spec
438444 edges .add ((spec .data_type_def , spec .data_type_inc ))
439445 if isinstance (spec , GroupSpec ):
440- # For each nested subspec, the included specs of that nested subspec should be resolved before
441- # this spec
442446 nested_subspecs = self .__collect_nested_subspecs (spec )
443447 for subspec in nested_subspecs :
444448 if subspec .data_type_inc is not None :
445- # TODO: cycles are not yet supported
446- # if spec.data_type_def == subspec.data_type_inc:
447- # # Allow the simple case of a "cycle" where A contains B, and B includes A
448- # # but do not add this edge to the graph because it makes a cycle.
449- # continue
449+ if spec .data_type_def == subspec .data_type_inc :
450+ skipped_edges .add ((spec .data_type_def , subspec .data_type_inc ))
451+ continue
452+ hierarchy = catalog .get_hierarchy (subspec .data_type_inc )
453+ if spec .data_type_def in hierarchy :
454+ skipped_edges .add ((spec .data_type_def , subspec .data_type_inc ))
455+ continue
450456 edges .add ((spec .data_type_def , subspec .data_type_inc ))
451- return edges
457+ return edges , skipped_edges
458+
459+ def __resolve_local (
460+ self , namespace : SpecNamespace , spec : BaseStorageSpec , resolving_in_progress : set = None
461+ ) -> None :
462+ """Resolve the data_type_inc of this spec and its children recursively.
463+
464+ Parameters
465+ ----------
466+ namespace : SpecNamespace
467+ The namespace containing the specs.
468+ spec : BaseStorageSpec
469+ The spec to resolve.
470+ resolving_in_progress : set, optional
471+ Set of type names currently being resolved, used to prevent infinite recursion
472+ in circular dependencies.
473+ """
474+ if resolving_in_progress is None :
475+ resolving_in_progress = set ()
452476
453- def __resolve_local (self , namespace : SpecNamespace , spec : BaseStorageSpec ) -> None :
454477 if spec .data_type_inc is not None and not spec .inc_spec_resolved :
455- # NOTE: The included spec may have already been resolved into the current spec if the current spec
456- # was copied (included) from another spec. For example, if A has a subspec B that includes C, and
457- # D includes A, then when resolving D, first, already resolved subspec B is copied from A to D, and
458- # then resolve_local may be called on B again
478+ if spec .data_type_inc in resolving_in_progress :
479+ return
459480 included_spec = self .get_spec (namespace .name , spec .data_type_inc )
460-
461- # NOTE: In most cases, because we are resolving specs in topological order, the included spec
462- # should have already been resolved. However, in the case of the "cycle" described above where
463- # A contains B, and B includes A, then the included spec will not have been resolved yet.
464-
465- # Resolve the included spec into this spec
466481 spec .resolve_inc_spec (included_spec , namespace )
482+ resolving_in_progress = resolving_in_progress | {spec .data_type_inc }
467483
468484 if isinstance (spec , GroupSpec ):
469- # Recursively resolve all subspecs
470- nested_subspecs = self .__collect_nested_subspecs (spec )
471- for subspec in nested_subspecs :
472- self .__resolve_local (namespace , subspec )
485+ for subspec in list (spec .groups ) + list (spec .datasets ):
486+ self .__resolve_local (namespace , subspec , resolving_in_progress )
473487
474- # Mark this spec as resolved if the included spec has been resolved and all subspecs have been resolved.
475- # This is not necessary / not used anywhere, but may be useful for debugging.
476488 spec .resolved = True
477489
478490 def resolve_all_specs (self ) -> None :
@@ -481,38 +493,65 @@ def resolve_all_specs(self) -> None:
481493 self .__resolve_namespace_specs (namespace )
482494
483495 def __resolve_namespace_specs (self , namespace : SpecNamespace ) -> None :
484- """Resolve all specs in the catalog."""
485- # Build a graph of all type dependencies
486- # For example, if A includes B, A has subspec that includes C, and B includes D, then A -> B, A -> C, B -> D
496+ """Resolve all specs in the catalog using topological sort order.
497+
498+ A dependency graph is built from each type's data_type_inc and nested subspec data_type_inc values.
499+ Cycle-creating edges are skipped. The graph is then sorted topologically, and types at the same
500+ level are sorted alphabetically to ensure deterministic ordering across environments.
501+ """
502+ # Build dependency graph, skipping edges that would create cycles
487503 ts = graphlib .TopologicalSorter ()
488- specs_without_deps = set () # track specs that have no dependencies
504+ all_type_names = set ()
505+ all_skipped_edges = set ()
489506 for type_name in namespace .catalog .get_registered_types ():
507+ all_type_names .add (type_name )
490508 spec = namespace .catalog .get_spec (type_name )
491- edges = self .__get_spec_dependencies (spec )
492- if not edges :
493- specs_without_deps .add (type_name )
494- else :
495- for e in edges :
496- ts .add (* e )
497-
498- # Check for cycles and get static topological order
499- # For example, in the ABCD example above, the static order is D, B, C, A
500- try :
501- static_order = list (ts .static_order ())
502- except graphlib .CycleError : # pragma: no cover
503- # This should not happen because cycles will cause an error during spec object creation
504- raise ValueError ("Cycle detected in specification dependencies. Cannot resolve specifications." )
505-
506- # In rare cases, a namespace may have specs that have no dependencies and are not included by any other
507- # spec, so they will not be in the topological sort. Add them to the front of the order.
508- for s in specs_without_deps :
509+ edges , skipped_edges = self .__get_spec_dependencies (spec , namespace .catalog )
510+ all_skipped_edges .update (skipped_edges )
511+ for edge in edges :
512+ ts .add (* edge )
513+
514+ # For types that inherit from a type with skipped (cycle) edges, add dependencies on the
515+ # cycle partner. E.g., if DynamicTable -> MeaningsTable was skipped (cycle), and
516+ # AlignedDynamicTable extends DynamicTable, add AlignedDynamicTable -> MeaningsTable.
517+ # This ensures MeaningsTable is resolved before AlignedDynamicTable, so that when
518+ # AlignedDynamicTable inherits DynamicTable's MeaningsTable subspec and re-resolves it,
519+ # the catalog MeaningsTable is already fully resolved.
520+ # Walk the full inheritance hierarchy to handle longer chains (D extends C extends A).
521+ if all_skipped_edges :
522+ skipped_deps = dict ()
523+ for parent , dep in all_skipped_edges :
524+ skipped_deps .setdefault (parent , set ()).add (dep )
525+ for type_name in namespace .catalog .get_registered_types ():
526+ for ancestor in namespace .catalog .get_hierarchy (type_name ):
527+ if ancestor not in skipped_deps :
528+ continue
529+ for dep in skipped_deps [ancestor ]:
530+ if dep == type_name :
531+ continue
532+ # Only add if it won't create a new cycle
533+ if type_name not in namespace .catalog .get_hierarchy (dep ):
534+ ts .add (type_name , dep )
535+
536+ # Get topological order using level-by-level processing with alphabetical sorting
537+ # within each level to ensure deterministic ordering regardless of insertion order.
538+ ts .prepare ()
539+ static_order = []
540+ while ts .is_active ():
541+ ready = sorted (ts .get_ready ()) # alphabetical sort within each level
542+ static_order .extend (ready )
543+ for node in ready :
544+ ts .done (node )
545+
546+ # Add types that have no dependencies and are not depended on by any other type
547+ for s in all_type_names :
509548 if s not in static_order :
510549 static_order .insert (0 , s )
511550
512551 # Resolve specs in topological order
513552 for type_name in static_order :
514553 spec = self .get_spec (namespace .name , type_name )
515- self .__resolve_local (namespace , spec )
554+ self .__resolve_local (namespace , spec , resolving_in_progress = { type_name } )
516555
517556
518557 def __load_namespace (self , namespace , reader ):
@@ -558,42 +597,42 @@ def __load_namespace(self, namespace, reader):
558597
559598 return included_types
560599
561- def __register_type (self , ndt , inc_ns , catalog , registered_types , in_progress_registrations = None ):
600+ def __register_type (self , ndt , inc_ns , catalog , registered_types , registering_in_progress = None ):
562601 """Register a type and its dependencies from a namespace into a catalog.
563602
564603 :param ndt: The name of the data type to register
565604 :param inc_ns: The namespace containing the type
566605 :param catalog: The catalog to register the type into
567606 :param registered_types: Set of already registered types (to avoid re-registering)
568- :param in_progress_registrations : Set of types currently being registered (for circular dependency detection).
607+ :param registering_in_progress : Set of types currently being registered (for circular dependency detection).
569608 None if the registration process is starting.
570609 """
571- if in_progress_registrations is None :
572- in_progress_registrations = set ()
573- if ndt in registered_types or ndt in in_progress_registrations :
610+ if registering_in_progress is None :
611+ registering_in_progress = set ()
612+ if ndt in registered_types or ndt in registering_in_progress :
574613 # Already registered or currently being registered (circular dependency)
575614 return
576615 # Track that we're currently registering this type
577- in_progress_registrations .add (ndt )
616+ registering_in_progress .add (ndt )
578617 spec = inc_ns .get_spec (ndt )
579618 spec_file = inc_ns .catalog .get_spec_source_file (ndt )
580- self .__register_dependent_types (spec , inc_ns , catalog , registered_types , in_progress_registrations )
619+ self .__register_dependent_types (spec , inc_ns , catalog , registered_types , registering_in_progress )
581620 if isinstance (spec , DatasetSpec ):
582621 built_spec = self .dataset_spec_cls .build_spec (spec )
583622 else :
584623 built_spec = self .group_spec_cls .build_spec (spec )
585624 registered_types .add (ndt )
586625 catalog .register_spec (built_spec , spec_file )
587626
588- def __register_dependent_types (self , spec , inc_ns , catalog , registered_types , in_progress_registrations ):
627+ def __register_dependent_types (self , spec , inc_ns , catalog , registered_types , registering_in_progress ):
589628 """Ensure that all types used by this type are registered.
590629
591630 This handles:
592631 - Parent types (data_type_inc)
593632 - Nested type definitions (data_type_def in child specs)
594633 - Link target types
595634
596- Circular dependencies are handled by checking in_progress_registrations before
635+ Circular dependencies are handled by checking registering_in_progress before
597636 recursing. This allows patterns like:
598637 - Type A contains Type B, and Type B extends Type A
599638 - Type A contains a reference to Type A (self-reference)
@@ -602,23 +641,23 @@ def __register_dependent_types_helper(child_spec):
602641 if isinstance (child_spec , (GroupSpec , DatasetSpec )):
603642 if child_spec .data_type_inc is not None :
604643 self .__register_type (
605- child_spec .data_type_inc , inc_ns , catalog , registered_types , in_progress_registrations
644+ child_spec .data_type_inc , inc_ns , catalog , registered_types , registering_in_progress
606645 )
607646 if child_spec .data_type_def is not None : # nested type definition
608647 self .__register_type (
609- child_spec .data_type_def , inc_ns , catalog , registered_types , in_progress_registrations
648+ child_spec .data_type_def , inc_ns , catalog , registered_types , registering_in_progress
610649 )
611650 else : # spec is a LinkSpec
612651 self .__register_type (
613- child_spec .target_type , inc_ns , catalog , registered_types , in_progress_registrations
652+ child_spec .target_type , inc_ns , catalog , registered_types , registering_in_progress
614653 )
615654 if isinstance (child_spec , GroupSpec ):
616655 for nested_spec in (child_spec .groups + child_spec .datasets + child_spec .links ):
617656 __register_dependent_types_helper (nested_spec )
618657
619658 # Register parent type first
620659 if spec .data_type_inc is not None :
621- self .__register_type (spec .data_type_inc , inc_ns , catalog , registered_types , in_progress_registrations )
660+ self .__register_type (spec .data_type_inc , inc_ns , catalog , registered_types , registering_in_progress )
622661
623662 # Register types from child specs (groups, datasets, links)
624663 if isinstance (spec , GroupSpec ):
0 commit comments