@@ -106,3 +106,119 @@ def test_write_to_file(self, cli_runner, sbom_project, monkeypatch):
106106 if out_path .exists ():
107107 content = json .loads (out_path .read_text (encoding = "utf-8" ))
108108 assert isinstance (content , dict )
109+
110+
111+ class TestSbomReachabilityGraph :
112+ """Hand-built-graph unit tests for the reverse-BFS reachability helpers.
113+
114+ Pins the O(V+E)-per-matched-node reverse traversal (``_entry_ancestors`` /
115+ ``_trace_entry_reach``) against the historical per-(entry, node)
116+ ``nx.has_path`` semantics: a node is reachable iff SOME in-degree-0 entry
117+ has a path to it, and ``entry_points`` is the reaching-entry set in
118+ canonical entry order. Replaces the quadratic loop that timed out on the
119+ ~14k-symbol roam-code corpus (>45s).
120+ """
121+
122+ @staticmethod
123+ def _graph ():
124+ import networkx as nx
125+
126+ # Two entry points (in-degree 0): 1 and 2.
127+ # 1 -> 3 -> 4 (4 reachable from entry 1)
128+ # 2 -> 5 (5 reachable from entry 2)
129+ # 6 (isolated: in-degree 0 AND out-degree 0 -> entry,
130+ # trivially reaches only itself)
131+ # 7 -> 8, 8 unreachable from any entry because 7 has an incoming edge
132+ # from 5 (so 7 is NOT an entry) and nothing else
133+ # feeds 9
134+ # 9 (in-degree 1 from 8 -> reachable via 2 -> 5 -> 7 -> 8 -> 9)
135+ G = nx .DiGraph ()
136+ for nid in (1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ):
137+ G .add_node (nid , name = f"n{ nid } " , qualified_name = f"q{ nid } " , file_path = f"f{ nid } .py" )
138+ G .add_edges_from ([(1 , 3 ), (3 , 4 ), (2 , 5 ), (5 , 7 ), (7 , 8 ), (8 , 9 )])
139+ return G
140+
141+ def _entries (self , G ):
142+ return [n for n in G .nodes () if G .in_degree (n ) == 0 ]
143+
144+ def test_entries_are_indegree_zero (self ):
145+ from roam .commands .cmd_sbom import _entry_ancestors
146+
147+ G = self ._graph ()
148+ entries = self ._entries (G )
149+ # 1, 2, 6 have no incoming edges.
150+ assert set (entries ) == {1 , 2 , 6 }
151+ # Sanity: helper agrees with the membership it filters against.
152+ assert _entry_ancestors (G , 4 , set (entries )) == {1 }
153+
154+ def test_reverse_bfs_matches_has_path (self ):
155+ """The reverse-BFS reaching set must equal the brute-force has_path set."""
156+ import networkx as nx
157+
158+ from roam .commands .cmd_sbom import _entry_ancestors
159+
160+ G = self ._graph ()
161+ entries = self ._entries (G )
162+ entry_set = set (entries )
163+ for nid in G .nodes ():
164+ expected = {e for e in entries if nx .has_path (G , e , nid )}
165+ assert _entry_ancestors (G , nid , entry_set ) == expected , f"mismatch at node { nid } "
166+
167+ def test_entry_points_ordered_by_entry_id (self ):
168+ from roam .commands .cmd_sbom import _trace_entry_reach
169+
170+ G = self ._graph ()
171+ entries = self ._entries (G ) # [1, 2, 6] in node-iteration (id) order
172+ # Node 9 is reachable only from entry 2 (2 -> 5 -> 7 -> 8 -> 9).
173+ assert _trace_entry_reach (G , entries , 9 ) == [2 ]
174+ # Node 4 is reachable only from entry 1.
175+ assert _trace_entry_reach (G , entries , 4 ) == [1 ]
176+
177+ def test_entry_is_self_reachable (self ):
178+ """An entry node that is itself the matched node is trivially reachable
179+ (parity with the old ``nx.has_path(G, eid, eid) is True``)."""
180+ from roam .commands .cmd_sbom import _entry_ancestors
181+
182+ G = self ._graph ()
183+ entries = self ._entries (G )
184+ assert _entry_ancestors (G , 6 , set (entries )) == {6 }
185+ assert _entry_ancestors (G , 1 , set (entries )) == {1 }
186+
187+ def test_record_match_short_circuits_on_first_reachable (self ):
188+ """``_record_match`` populates entry_points from the first reachable
189+ matched node and short-circuits afterward — preserve that exactly."""
190+ from roam .commands .cmd_sbom import _record_match
191+
192+ G = self ._graph ()
193+ entries = self ._entries (G )
194+ entry_set = set (entries )
195+ info = {"reachable" : False , "entry_points" : [], "matched_symbols" : []}
196+ # First matched node 4 -> reachable from entry 1 (q1).
197+ _record_match (info , "q4" , G , entries , 4 , entry_set )
198+ assert info ["reachable" ] is True
199+ assert info ["entry_points" ] == ["q1" ]
200+ # Second matched node 9 -> reachable from entry 2 (q2), but short-circuit
201+ # means entry_points is unchanged; matched_symbols still grows.
202+ _record_match (info , "q9" , G , entries , 9 , entry_set )
203+ assert info ["entry_points" ] == ["q1" ]
204+ assert info ["matched_symbols" ] == ["q4" , "q9" ]
205+
206+ def test_unreachable_node_reports_no_entries (self ):
207+ import networkx as nx
208+
209+ from roam .commands .cmd_sbom import _record_match
210+
211+ # A node with no incoming path from any entry: a lone cycle with no
212+ # entry feeding it.
213+ G = nx .DiGraph ()
214+ for nid in (1 , 10 , 11 ):
215+ G .add_node (nid , name = f"n{ nid } " , qualified_name = f"q{ nid } " , file_path = f"f{ nid } .py" )
216+ # 10 <-> 11 cycle, neither is an entry (both have in-degree 1); 1 is an
217+ # isolated entry that does NOT reach the cycle.
218+ G .add_edges_from ([(10 , 11 ), (11 , 10 )])
219+ entries = [n for n in G .nodes () if G .in_degree (n ) == 0 ]
220+ assert entries == [1 ]
221+ info = {"reachable" : False , "entry_points" : [], "matched_symbols" : []}
222+ _record_match (info , "q10" , G , entries , 10 , set (entries ))
223+ assert info ["reachable" ] is False
224+ assert info ["entry_points" ] == []
0 commit comments