|
| 1 | +"""State migration types and registry. |
| 2 | +
|
| 3 | +Realizes pipeline-utilities §10.12 (proposal 0014). A |
| 4 | +``StateMigration`` describes one edge in the migration graph; |
| 5 | +``MigrationRegistry`` holds the ordered set and resolves chains |
| 6 | +via BFS. Ambiguity (duplicate ``(from, to)`` pairs OR multiple |
| 7 | +distinct shortest paths between the same source/sink) is a |
| 8 | +configuration-style error per §10.12.1 / §10.12.2. |
| 9 | +""" |
| 10 | + |
| 11 | +from __future__ import annotations |
| 12 | + |
| 13 | +from collections import deque |
| 14 | +from collections.abc import Callable, Iterator |
| 15 | +from dataclasses import dataclass |
| 16 | +from typing import Any |
| 17 | + |
| 18 | + |
| 19 | +@dataclass(frozen=True) |
| 20 | +class StateMigration: |
| 21 | + """One edge in the migration graph. |
| 22 | +
|
| 23 | + ``migrate`` receives the most-deserialized form the backend can |
| 24 | + expose that is still independent of the current state class |
| 25 | + (a plain ``dict`` for JSON-backed backends). It MUST return a |
| 26 | + value of the same kind, suitable for the next migration in the |
| 27 | + chain (or for final deserialization into the current state class). |
| 28 | +
|
| 29 | + Migrations MUST be pure: deterministic, no I/O, no implicit |
| 30 | + state. The framework does not police purity per spec §10.12.2 |
| 31 | + ("the contract is documented, not policed"); violating it |
| 32 | + risks non-deterministic resume. |
| 33 | + """ |
| 34 | + |
| 35 | + from_version: str |
| 36 | + to_version: str |
| 37 | + migrate: Callable[[Any], Any] |
| 38 | + |
| 39 | + |
| 40 | +class MigrationRegistry: |
| 41 | + """Ordered set of registered migrations + BFS chain resolution. |
| 42 | +
|
| 43 | + Registration-time invariants: |
| 44 | +
|
| 45 | + - Two migrations with the same ``from_version`` AND |
| 46 | + ``to_version`` raise ``ValueError`` (chain ambiguity per |
| 47 | + §10.12.1). |
| 48 | + - Two migrations with the same ``from_version`` and different |
| 49 | + ``to_version`` are permitted (branched migration graph; |
| 50 | + chain resolution picks a path). |
| 51 | +
|
| 52 | + Resolution-time semantics (per §10.12.2): |
| 53 | +
|
| 54 | + - BFS from ``record.schema_version`` to |
| 55 | + ``current.schema_version``. BFS naturally finds the shortest |
| 56 | + path. |
| 57 | + - Empty registry on mismatch → no path → caller raises |
| 58 | + ``CheckpointStateMigrationMissing``. |
| 59 | + - Non-empty registry with no connecting path → same. |
| 60 | + - Found a unique shortest path → return ordered list. |
| 61 | + - Found multiple distinct shortest paths (same edge count, |
| 62 | + different edge sequences) → raise ``ValueError`` per |
| 63 | + §10.12.2's ambiguous-chain rule. Spec accepts load-time |
| 64 | + detection. |
| 65 | + """ |
| 66 | + |
| 67 | + def __init__(self) -> None: |
| 68 | + self._migrations: dict[tuple[str, str], StateMigration] = {} |
| 69 | + self._edges: dict[str, list[StateMigration]] = {} |
| 70 | + |
| 71 | + def register(self, migration: StateMigration) -> None: |
| 72 | + key = (migration.from_version, migration.to_version) |
| 73 | + if key in self._migrations: |
| 74 | + raise ValueError( |
| 75 | + f"duplicate state migration {migration.from_version!r}→" |
| 76 | + f"{migration.to_version!r} registered; chain would be ambiguous" |
| 77 | + ) |
| 78 | + self._migrations[key] = migration |
| 79 | + self._edges.setdefault(migration.from_version, []).append(migration) |
| 80 | + |
| 81 | + def __iter__(self) -> Iterator[StateMigration]: |
| 82 | + return iter(self._migrations.values()) |
| 83 | + |
| 84 | + def __len__(self) -> int: |
| 85 | + return len(self._migrations) |
| 86 | + |
| 87 | + def resolve_chain( |
| 88 | + self, |
| 89 | + from_version: str, |
| 90 | + to_version: str, |
| 91 | + ) -> list[StateMigration] | None: |
| 92 | + """Return an ordered chain of migrations bridging the two |
| 93 | + versions, or ``None`` if no chain exists. |
| 94 | +
|
| 95 | + Raises ``ValueError`` if multiple distinct shortest paths |
| 96 | + exist (ambiguous chain per §10.12.2). |
| 97 | + """ |
| 98 | + if from_version == to_version: |
| 99 | + return [] |
| 100 | + |
| 101 | + # BFS that records every shortest-length path. If multiple |
| 102 | + # paths share the minimum length, the chain is ambiguous. |
| 103 | + # Standard BFS finds the shortest distance; the path-recording |
| 104 | + # variant lets us detect ambiguity without a second pass. |
| 105 | + # ``frontier`` items are (version, path_so_far). |
| 106 | + frontier: deque[tuple[str, list[StateMigration]]] = deque() |
| 107 | + frontier.append((from_version, [])) |
| 108 | + shortest_paths: list[list[StateMigration]] = [] |
| 109 | + shortest_length: int | None = None |
| 110 | + # ``distances`` tracks the BFS layer at which each node was |
| 111 | + # first seen. Frontier entries past the shortest_length layer |
| 112 | + # are pruned. |
| 113 | + distances: dict[str, int] = {from_version: 0} |
| 114 | + |
| 115 | + while frontier: |
| 116 | + version, path = frontier.popleft() |
| 117 | + depth = len(path) |
| 118 | + # Stop expanding once we've moved past the shortest target. |
| 119 | + if shortest_length is not None and depth >= shortest_length: |
| 120 | + continue |
| 121 | + for edge in self._edges.get(version, []): |
| 122 | + next_version = edge.to_version |
| 123 | + next_path = path + [edge] |
| 124 | + if next_version == to_version: |
| 125 | + if shortest_length is None: |
| 126 | + shortest_length = len(next_path) |
| 127 | + if len(next_path) == shortest_length: |
| 128 | + shortest_paths.append(next_path) |
| 129 | + continue |
| 130 | + # Cycle-avoidance: a node revisited at the same or |
| 131 | + # deeper BFS layer can't contribute to a strict- |
| 132 | + # shortest path. Allow re-entry only when the new |
| 133 | + # arrival is at the same layer as the first arrival |
| 134 | + # (distinct shortest paths through the same node). |
| 135 | + prior_depth = distances.get(next_version) |
| 136 | + if prior_depth is not None and prior_depth < depth + 1: |
| 137 | + continue |
| 138 | + distances[next_version] = depth + 1 |
| 139 | + frontier.append((next_version, next_path)) |
| 140 | + |
| 141 | + if not shortest_paths: |
| 142 | + return None |
| 143 | + if len(shortest_paths) > 1: |
| 144 | + descriptions = [" → ".join([from_version, *(e.to_version for e in p)]) for p in shortest_paths] |
| 145 | + raise ValueError( |
| 146 | + f"ambiguous migration chain from {from_version!r} to " |
| 147 | + f"{to_version!r}: multiple distinct shortest paths exist " |
| 148 | + f"({descriptions}); register fewer migrations or pick a " |
| 149 | + f"single canonical route" |
| 150 | + ) |
| 151 | + return shortest_paths[0] |
| 152 | + |
| 153 | + def describe(self) -> str: |
| 154 | + """Human-readable description of the registered set, used |
| 155 | + in the ``CheckpointStateMigrationMissing`` error payload. |
| 156 | + Empty registry returns ``"<no migrations registered>"``. |
| 157 | + """ |
| 158 | + if not self._migrations: |
| 159 | + return "<no migrations registered>" |
| 160 | + return "\n".join(f"{m.from_version} → {m.to_version}" for m in self._migrations.values()) |
| 161 | + |
| 162 | + |
| 163 | +__all__ = ["MigrationRegistry", "StateMigration"] |
0 commit comments