Skip to content

improvement(utils): Use deque instead of list for BFS#147

Open
barucden wants to merge 1 commit into
Shoobx:masterfrom
barucden:bfs-complexity
Open

improvement(utils): Use deque instead of list for BFS#147
barucden wants to merge 1 commit into
Shoobx:masterfrom
barucden:bfs-complexity

Conversation

@barucden
Copy link
Copy Markdown

list.pop(0) has linear complexity (in the number of list members), so the previous BFS implementation had a quadratic complexity.

I used this program to measure the difference:

import time
from lxml import etree
from xmldiff import main as xmldiff

def make_xml(breadth, depth, tweak_last=False):
    """Generate an XML string with `breadth` children per node, `depth` levels."""
    lines = ['<root>']

    def build(level, path):
        if level >= depth:
            return
        for i in range(breadth):
            tag = f"n{i}"
            attr = f' v="{path}_{i}"'
            # Tweak the very last leaf to force a diff
            if tweak_last and level == depth - 1 and i == breadth - 1:
                attr += ' changed="true"'
            lines.append(f'<{tag}{attr}>')
            build(level + 1, f"{path}_{i}")
            lines.append(f'</{tag}>')

    build(0, "r")
    lines.append('</root>')
    return etree.fromstring('\n'.join(lines))

def main():
    breadth = 5
    depth = 5

    left = make_xml(breadth, depth)
    right = make_xml(breadth, depth, tweak_last=True)

    start = time.perf_counter()
    xmldiff.diff_trees(left, right)
    elapsed = time.perf_counter() - start

    # Count nodes for reference
    total = sum(breadth ** i for i in range(depth + 1))
    print(f"nodes={total}  time={elapsed:.4f}s")

if __name__ == "__main__":
    main()

The current version was approximately 300ms faster than master.

`list.pop(0)` has linear complexity (in the number of list members), so the previous BFS implementation had a quadratic complexity.

I used this program to measure the difference:
```python
import time
from lxml import etree
from xmldiff import main as xmldiff

def make_xml(breadth, depth, tweak_last=False):
    """Generate an XML string with `breadth` children per node, `depth` levels."""
    lines = ['<root>']

    def build(level, path):
        if level >= depth:
            return
        for i in range(breadth):
            tag = f"n{i}"
            attr = f' v="{path}_{i}"'
            # Tweak the very last leaf to force a diff
            if tweak_last and level == depth - 1 and i == breadth - 1:
                attr += ' changed="true"'
            lines.append(f'<{tag}{attr}>')
            build(level + 1, f"{path}_{i}")
            lines.append(f'</{tag}>')

    build(0, "r")
    lines.append('</root>')
    return etree.fromstring('\n'.join(lines))

def main():
    breadth = 5
    depth = 5

    left = make_xml(breadth, depth)
    right = make_xml(breadth, depth, tweak_last=True)

    start = time.perf_counter()
    xmldiff.diff_trees(left, right)
    elapsed = time.perf_counter() - start

    # Count nodes for reference
    total = sum(breadth ** i for i in range(depth + 1))
    print(f"nodes={total}  time={elapsed:.4f}s")

if __name__ == "__main__":
    main()
```

The current version was approximately 300ms faster than master.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant