Skip to content

Commit f827fd2

Browse files
jskladanclaude
andcommitted
perf(dependency_graph): optimize remove_dependency to avoid full-graph scans
- Replace recursive orphan removal with iterative BFS - Use direct parent/child references instead of scanning all nodes - Add tests for diamond cascade and cross-subtree survival Signed-off-by: Josef Skladanka <jskladan@redhat.com> Co-Authored-By: Claude Code <noreply@anthropic.com>
1 parent 1f8c31c commit f827fd2

2 files changed

Lines changed: 108 additions & 29 deletions

File tree

src/fromager/dependency_graph.py

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import collections
34
import dataclasses
45
import graphlib
56
import json
@@ -349,7 +350,7 @@ def remove_dependency(
349350
"""Remove a dependency node and any orphaned descendants from the graph.
350351
351352
Removes the node and all edges pointing to it. Child nodes that have
352-
no remaining parents after removal are recursively removed as well.
353+
no remaining parents after removal are iteratively removed as well.
353354
354355
Args:
355356
req_name: Canonical name of the package
@@ -360,36 +361,50 @@ def remove_dependency(
360361
logger.debug(f"Cannot remove {key} - not in graph")
361362
return
362363

363-
logger.debug(f"Removing failed dependency {key} from graph")
364+
queue: collections.deque[str] = collections.deque([key])
364365

365-
deleted_node = self.nodes[key]
366-
367-
children = [edge.destination_node for edge in deleted_node.children]
368-
369-
# Clean up back-references (parents) in nodes that were children of the removed node
370-
for child_edge in deleted_node.children:
371-
child_node = child_edge.destination_node
372-
filtered_parents = [
373-
edge for edge in child_node.parents if edge.destination_node.key != key
374-
]
375-
child_node.parents.clear()
376-
child_node.parents.extend(filtered_parents)
377-
378-
# Remove the node itself
379-
del self.nodes[key]
380-
381-
# Remove forward edges from any node whose children pointed to the removed node
382-
for node in self.nodes.values():
383-
filtered_children = [
384-
edge for edge in node.children if edge.destination_node.key != key
385-
]
386-
node.children.clear()
387-
node.children.extend(filtered_children)
366+
# BFS over orphaned descendants
367+
while queue:
368+
key = queue.popleft()
369+
# This is a defensive check, should not happen in normal operation
370+
if key not in self.nodes:
371+
logger.debug(f"Cannot remove {key} - not in graph")
372+
continue
388373

389-
# Recursively remove children that have no remaining parents
390-
for child in children:
391-
if child.key != ROOT and child.key in self.nodes and not child.parents:
392-
self.remove_dependency(child.canonicalized_name, child.version)
374+
logger.debug(f"Removing failed dependency {key} from graph")
375+
376+
deleted_node = self.nodes[key]
377+
378+
# Remove references to this node from its direct children
379+
children = []
380+
for child_edge in deleted_node.children:
381+
child_node = child_edge.destination_node
382+
filtered_parents = [
383+
edge
384+
for edge in child_node.parents
385+
if edge.destination_node.key != key
386+
]
387+
child_node.parents.clear()
388+
child_node.parents.extend(filtered_parents)
389+
children.append(child_node)
390+
391+
# Remove references to this node from its direct parents
392+
for parent_edge in deleted_node.parents:
393+
parent_node = parent_edge.destination_node
394+
filtered_children = [
395+
edge
396+
for edge in parent_node.children
397+
if edge.destination_node.key != key
398+
]
399+
parent_node.children.clear()
400+
parent_node.children.extend(filtered_children)
401+
402+
del self.nodes[key]
403+
404+
# Enqueue children that have become orphans
405+
for child in children:
406+
if child.key != ROOT and child.key in self.nodes and not child.parents:
407+
queue.append(child.key)
393408

394409
def get_dependency_edges(
395410
self, match_dep_types: list[RequirementType] | None = None

tests/test_dependency_graph.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,70 @@ def test_remove_dependency_mid_graph_cascades() -> None:
633633
assert node_c.parents[0].destination_node.key == "a==1.0"
634634

635635

636+
def test_remove_dependency_shared_child_kept_by_other_subtree() -> None:
637+
"""Shared child and its descendants survive when kept alive by another subtree.
638+
639+
ROOT -> a -> b -> d
640+
a -> c -> d
641+
ROOT -> e -> c
642+
643+
Removing a should remove a, b; c and d survive because e still parents c.
644+
"""
645+
graph = _build_graph(
646+
("ROOT", "a", "toplevel"),
647+
("ROOT", "e", "toplevel"),
648+
("a", "b", "install"),
649+
("a", "c", "install"),
650+
("b", "d", "install"),
651+
("c", "d", "install"),
652+
("e", "c", "install"),
653+
)
654+
655+
graph.remove_dependency(canonicalize_name("a"), Version("1.0"))
656+
657+
assert "a==1.0" not in graph.nodes
658+
assert "b==1.0" not in graph.nodes
659+
assert "c==1.0" in graph.nodes
660+
assert "d==1.0" in graph.nodes
661+
assert "e==1.0" in graph.nodes
662+
663+
node_c = graph.nodes["c==1.0"]
664+
assert len(node_c.parents) == 1
665+
assert node_c.parents[0].destination_node.key == "e==1.0"
666+
assert len(node_c.children) == 1
667+
assert node_c.children[0].destination_node.key == "d==1.0"
668+
669+
node_d = graph.nodes["d==1.0"]
670+
assert len(node_d.parents) == 1
671+
assert node_d.parents[0].destination_node.key == "c==1.0"
672+
673+
674+
def test_remove_dependency_cascades_through_diamond() -> None:
675+
"""Single removal cascades through a diamond, removing all orphans.
676+
677+
ROOT -> a -> b -> d
678+
a -> c -> d
679+
680+
Removing a orphans b and c; as both are removed, d loses all parents
681+
and is also removed.
682+
"""
683+
graph = _build_graph(
684+
("ROOT", "a", "toplevel"),
685+
("a", "b", "install"),
686+
("a", "c", "install"),
687+
("b", "d", "install"),
688+
("c", "d", "install"),
689+
)
690+
691+
graph.remove_dependency(canonicalize_name("a"), Version("1.0"))
692+
693+
assert "a==1.0" not in graph.nodes
694+
assert "b==1.0" not in graph.nodes
695+
assert "c==1.0" not in graph.nodes
696+
assert "d==1.0" not in graph.nodes
697+
assert len(graph) == 0
698+
699+
636700
def test_remove_dependency_nonexistent() -> None:
637701
"""Removing a node not in the graph is a no-op."""
638702
graph = _build_graph(("ROOT", "a", "toplevel"))

0 commit comments

Comments
 (0)