2727#include < libevmasm/AssemblyItem.h>
2828#include < libevmasm/SemanticInformation.h>
2929
30+ #include < boost/container_hash/hash.hpp>
31+
3032#include < range/v3/algorithm/any_of.hpp>
33+ #include < range/v3/algorithm/equal.hpp>
3134
32- #include < functional>
33- #include < set>
35+ #include < unordered_set>
3436
3537using namespace solidity ;
3638using namespace solidity ::evmasm;
3739
3840
3941bool BlockDeduplicator::deduplicate ()
4042{
41- // Compares indices based on the suffix that starts there, ignoring tags and stopping at
42- // opcodes that stop the control flow.
43+ // Group basic blocks by a content hash and dedup within each bucket.
44+ // The hash and equality both walk a BlockIterator that ignores tags and stops at
45+ // opcodes that terminate control flow, replacing the block's own self-push by a
46+ // virtual tag so that recursive loops match.
4347
4448 // Virtual tag that signifies "the current block" and which is used to optimize loops.
4549 // We abort if this virtual tag actually exists.
@@ -51,48 +55,45 @@ bool BlockDeduplicator::deduplicate()
5155 return false ;
5256 }
5357
54- std::function<bool (size_t , size_t )> comparator = [&](size_t _i, size_t _j)
55- {
56- if (_i == _j)
57- return false ;
58-
59- // To compare recursive loops, we have to already unify PushTag opcodes of the
60- // block's own tag.
61- AssemblyItem pushFirstTag{pushSelf};
62- AssemblyItem pushSecondTag{pushSelf};
63-
64- if (_i < m_items.size () && m_items.at (_i).type () == Tag)
65- pushFirstTag = m_items.at (_i).pushTag ();
66- if (_j < m_items.size () && m_items.at (_j).type () == Tag)
67- pushSecondTag = m_items.at (_j).pushTag ();
68-
69- using diff_type = BlockIterator::difference_type;
70- BlockIterator first{m_items.begin () + diff_type (_i), m_items.end (), &pushFirstTag, &pushSelf};
71- BlockIterator second{m_items.begin () + diff_type (_j), m_items.end (), &pushSecondTag, &pushSelf};
72- BlockIterator end{m_items.end (), m_items.end ()};
58+ BlockIterator const end{m_items.end (), m_items.end ()};
7359
74- if (first != end && (*first).type () == Tag)
75- ++first;
76- if (second != end && (*second).type () == Tag)
77- ++second;
60+ // yields a block iterator into the body of a block (skips `Tag` typed assembly items at `_blockBegin`)
61+ auto const blockBodyBegin = [&](std::size_t const _blockBegin, AssemblyItem const & _selfTagPush)
62+ {
63+ BlockIterator it{
64+ m_items.begin () + static_cast <BlockIterator::difference_type>(_blockBegin),
65+ m_items.end (),
66+ &_selfTagPush,
67+ &pushSelf
68+ };
69+ if (it != end && (*it).type () == Tag)
70+ ++it;
71+ return it;
72+ };
7873
79- return std::lexicographical_compare (first, end, second, end);
74+ auto const hashBlockAt = [&](std::size_t const _i)
75+ {
76+ return boost::hash_range (blockBodyBegin (_i, m_items[_i].pushTag ()), end);
77+ };
78+ auto const blocksAtEqual = [&](std::size_t const _i, std::size_t const _j)
79+ {
80+ return ranges::equal (
81+ blockBodyBegin (_i, m_items[_i].pushTag ()), end,
82+ blockBodyBegin (_j, m_items[_j].pushTag ()), end
83+ );
8084 };
8185
82- size_t iterations = 0 ;
86+ std:: size_t iterations = 0 ;
8387 for (; ; ++iterations)
8488 {
85- // @todo this should probably be optimized.
86- std::set<size_t , std::function<bool (size_t , size_t )>> blocksSeen (comparator);
87- for (size_t i = 0 ; i < m_items.size (); ++i)
89+ std::unordered_set<std::size_t , decltype (hashBlockAt), decltype (blocksAtEqual)> seen (0 , hashBlockAt, blocksAtEqual);
90+ for (std::size_t i = 0 ; i < m_items.size (); ++i)
8891 {
89- if (m_items. at (i) .type () != Tag)
92+ if (m_items[i] .type () != Tag)
9093 continue ;
91- auto it = blocksSeen.find (i);
92- if (it == blocksSeen.end ())
93- blocksSeen.insert (i);
94- else
95- m_replacedTags[m_items.at (i).data ()] = m_items.at (*it).data ();
94+ auto const [it, inserted] = seen.insert (i);
95+ if (!inserted)
96+ m_replacedTags[m_items[i].data ()] = m_items[*it].data ();
9697 }
9798
9899 if (!applyTagReplacement (m_items, m_replacedTags))
0 commit comments