Skip to content

Commit 3b7f0d6

Browse files
committed
chore: add some justifications for the diff
1 parent b57b9d2 commit 3b7f0d6

4 files changed

Lines changed: 3613 additions & 0 deletions

File tree

diff-update-graphs.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Semantic diff of two update-graph YAML files.
4+
5+
Treats the `channels` list as an unordered set keyed by (name, datastore metadata).
6+
Reports per-channel differences in nodes and edges.
7+
8+
Usage:
9+
python3 diff-update-graphs.py <old-graph.yaml> <new-graph.yaml>
10+
python3 diff-update-graphs.py # defaults to old-proposed-update-graph.yaml vs proposed-update-graph.yaml
11+
"""
12+
13+
import sys
14+
import yaml
15+
16+
17+
def load_graph(path):
18+
with open(path) as f:
19+
return yaml.safe_load(f)
20+
21+
22+
def index_channels(graph):
23+
idx = {}
24+
for ch in graph.get("channels", []):
25+
key = (ch["name"], ch.get("metadata", {}).get("datastore", ""))
26+
idx[key] = ch
27+
return idx
28+
29+
30+
def semver_sort_key(s):
31+
# Simple sort key: split on dots and dashes, numeric parts as ints
32+
import re
33+
return [int(p) if p.isdigit() else p for p in re.split(r"[.\-]", s.lstrip("v"))]
34+
35+
36+
def diff_graphs(old_path, new_path):
37+
old = load_graph(old_path)
38+
new = load_graph(new_path)
39+
40+
old_ch = index_channels(old)
41+
new_ch = index_channels(new)
42+
43+
old_keys = set(old_ch.keys())
44+
new_keys = set(new_ch.keys())
45+
46+
print(f"Comparing:\n OLD: {old_path}\n NEW: {new_path}\n")
47+
48+
only_old = sorted(old_keys - new_keys)
49+
only_new = sorted(new_keys - old_keys)
50+
51+
if only_old:
52+
print("=== Channels only in OLD ===")
53+
for k in only_old:
54+
print(f" {k}")
55+
print()
56+
57+
if only_new:
58+
print("=== Channels only in NEW ===")
59+
for k in only_new:
60+
print(f" {k}")
61+
print()
62+
63+
print("=== Per-channel comparison ===")
64+
any_diff = False
65+
for key in sorted(old_keys & new_keys):
66+
oc = old_ch[key]
67+
nc = new_ch[key]
68+
69+
old_nodes = {n["id"]: n for n in oc.get("nodes", [])}
70+
new_nodes = {n["id"]: n for n in nc.get("nodes", [])}
71+
72+
only_old_nodes = sorted(set(old_nodes) - set(new_nodes), key=semver_sort_key)
73+
only_new_nodes = sorted(set(new_nodes) - set(old_nodes), key=semver_sort_key)
74+
75+
old_edges = {k: set(v) for k, v in (oc.get("edges") or {}).items()}
76+
new_edges = {k: set(v) for k, v in (nc.get("edges") or {}).items()}
77+
78+
all_from_nodes = set(old_edges) | set(new_edges)
79+
edge_diffs = {}
80+
for fn in all_from_nodes:
81+
oe = old_edges.get(fn, set())
82+
ne = new_edges.get(fn, set())
83+
only_old_e = sorted(oe - ne, key=semver_sort_key)
84+
only_new_e = sorted(ne - oe, key=semver_sort_key)
85+
if only_old_e or only_new_e:
86+
edge_diffs[fn] = (only_old_e, only_new_e)
87+
88+
# Node field-level diffs
89+
field_diffs = {}
90+
for nid in sorted(set(old_nodes) & set(new_nodes), key=semver_sort_key):
91+
on = old_nodes[nid]
92+
nn = new_nodes[nid]
93+
diffs = {}
94+
for f in set(on) | set(nn):
95+
if on.get(f) != nn.get(f):
96+
diffs[f] = (on.get(f), nn.get(f))
97+
if diffs:
98+
field_diffs[nid] = diffs
99+
100+
channel_label = f"{key[1]}/{key[0]}"
101+
if not only_old_nodes and not only_new_nodes and not edge_diffs and not field_diffs:
102+
print(f"\n {channel_label}: IDENTICAL")
103+
else:
104+
any_diff = True
105+
print(f"\n {channel_label}: DIFFERS")
106+
if only_old_nodes:
107+
print(f" Nodes only in OLD: {only_old_nodes}")
108+
if only_new_nodes:
109+
print(f" Nodes only in NEW: {only_new_nodes}")
110+
for fn in sorted(edge_diffs, key=semver_sort_key):
111+
only_old_e, only_new_e = edge_diffs[fn]
112+
if only_old_e:
113+
print(f" Edge {fn} -> REMOVED targets: {only_old_e}")
114+
if only_new_e:
115+
print(f" Edge {fn} -> ADDED targets: {only_new_e}")
116+
for nid, diffs in sorted(field_diffs.items(), key=lambda x: semver_sort_key(x[0])):
117+
print(f" Node {nid} field diffs: {diffs}")
118+
119+
if not any_diff and not only_old and not only_new:
120+
print("\nGraphs are SEMANTICALLY EQUIVALENT (order-agnostic).")
121+
122+
123+
if __name__ == "__main__":
124+
if len(sys.argv) == 3:
125+
old_path, new_path = sys.argv[1], sys.argv[2]
126+
elif len(sys.argv) == 1:
127+
old_path = "old-proposed-update-graph.yaml"
128+
new_path = "proposed-update-graph.yaml"
129+
else:
130+
print("Usage: diff-update-graphs.py [<old.yaml> <new.yaml>]")
131+
sys.exit(1)
132+
133+
diff_graphs(old_path, new_path)

graph-diff-summary.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Update Graph Diff: old-proposed-update-graph.yaml vs proposed-update-graph.yaml
2+
3+
The graphs are **not fully equivalent**. Below is every difference found.
4+
5+
---
6+
7+
## `memory/stable`: IDENTICAL — no differences.
8+
9+
---
10+
11+
## All four DB channels: `v1.37.1` edge set differs
12+
13+
All of cockroachdb, mysql, postgres, and spanner are affected.
14+
15+
| Graph | `v1.37.1` edges to |
16+
|-------|-------------------|
17+
| Old | `v1.38.0, v1.39.1, v1.40.1, v1.42.1, v1.45.4, v1.47.1, v1.48.0, v1.49.2, v1.51.1` |
18+
| New | `v1.38.0` only |
19+
20+
The old code let `v1.37.1` skip past `v1.38.0`; the new waypoint algorithm blocks that
21+
because `v1.38.0` is now a mandatory stop (`waypoint: true`).
22+
23+
---
24+
25+
## Phase nodes have broader outgoing edges in the new graph
26+
27+
The old code treated phase nodes as strict "step through me to the immediate next version"
28+
nodes. The new waypoint algorithm allows phase nodes to reach any target up to (but not
29+
past) the next waypoint. Affected nodes:
30+
31+
- **`cockroachdb/stable`**: `v1.30.0-phase1`
32+
- Old: only `→ v1.30.0`
33+
- New: `→ v1.30.0` through `v1.36.2`
34+
- **`postgres/stable`**: `v1.14.0-phase2`
35+
- Old: only `→ v1.14.0`
36+
- New: `→ v1.14.0` through `v1.36.2`
37+
- **`spanner/stable`**: `v1.22.2-phase2` and `v1.29.5-phase1`
38+
- Old: each pointed only to the immediate next version
39+
- New: each can reach further (up to the `v1.38.0` waypoint)
40+
41+
---
42+
43+
## `spanner/stable`: `v1.51.1` node missing from old graph
44+
45+
The old graph encoded the latest spanner version as a quirk: node `v1.49.2` had `tag:
46+
v1.51.1`. The new graph correctly has `v1.49.2` with `tag: v1.49.2` and a separate
47+
`v1.51.1` node, which adds outgoing edges from all existing spanner nodes to `v1.51.1`.
48+
49+
---
50+
51+
## Node field differences
52+
53+
| Channel | Node | Field | Old value | New value |
54+
|---------|------|-------|-----------|-----------|
55+
| `cockroachdb/stable` | `v1.30.0-phase1` | `phase` | missing | `write-both-read-new` |
56+
| `spanner/stable` | `v1.29.5-phase1` | `phase` | missing | `write-both-read-new` |
57+
| `spanner/stable` | `v1.49.2` | `tag` | `v1.51.1` | `v1.49.2` |
58+
59+
---
60+
61+
## Bottom line
62+
63+
The graphs differ in four ways:
64+
65+
1. **`v1.37.1` edge narrowing** (all DB channels) — likely correct; `v1.38.0` is now a
66+
hard waypoint.
67+
2. **Phase-node outgoing edge expansion** — semantic change: old code was "phase node →
68+
immediate next release only"; new algorithm is "phase node → anything up to the next
69+
waypoint". Whether multi-hop skips from a phase node are safe depends on whether
70+
those intermediate versions require a migration stop.
71+
3. **`spanner/stable` `v1.51.1` node added** — likely a bug fix in the old graph where
72+
`v1.49.2` was carrying the wrong tag.
73+
4. **`phase` field missing on phase nodes in old graph** — old serialization omitted the
74+
`phase` field from those nodes; new graph includes it.

0 commit comments

Comments
 (0)