Skip to content

Commit 6b2fd3c

Browse files
authored
Qualify function names (#678)
Closes #620
1 parent 194a440 commit 6b2fd3c

11 files changed

Lines changed: 275 additions & 7 deletions

File tree

kmir/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ description = ""
99
requires-python = "~=3.10"
1010
dependencies = [
1111
"kframework==v7.1.286",
12+
"rust-demangler==1.0",
1213
]
1314

1415
[[project.authors]]

kmir/src/kmir/hello.py

Lines changed: 0 additions & 2 deletions
This file was deleted.

kmir/src/kmir/linker.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from __future__ import annotations
22

33
import logging
4+
from itertools import chain
45
from math import ceil, log10
56
from typing import TYPE_CHECKING
67

78
from .smir import SMIRInfo
89

910
if TYPE_CHECKING:
11+
from collections.abc import Iterator
1012
from typing import Final
1113

1214
_LOGGER: Final = logging.getLogger(__name__)
@@ -20,6 +22,8 @@ def link(smirs: list[SMIRInfo]) -> SMIRInfo:
2022
_LOGGER.info(f'Maximum type ID (offset) is {offset}, linking {len(smirs)} smir.json files')
2123

2224
for i, smir in enumerate(smirs):
25+
qualify_items(smir)
26+
2327
smir_offset = offset * i
2428
_LOGGER.debug(f'Offset {smir_offset} for smir {smir._smir["name"]}')
2529
apply_offset(smir, smir_offset)
@@ -42,6 +46,135 @@ def id_range(smir: SMIRInfo) -> int:
4246
return max(0, *smir.function_symbols, *smir.types, *smir.spans, *smir.allocs)
4347

4448

49+
def qualify_items(info: SMIRInfo) -> None:
50+
"""Qualify each unqualified function item name.
51+
52+
The missing prefix is extracted from the symbol name.
53+
"""
54+
55+
for item in info._smir['items']:
56+
match item:
57+
case {
58+
'symbol_name': symbol_name,
59+
'mono_item_kind': {
60+
'MonoItemFn': {
61+
'name': name,
62+
} as mono_item_fn,
63+
},
64+
}:
65+
qualified_name = _mono_item_fn_name(symbol_name=symbol_name, name=name)
66+
if qualified_name != name:
67+
_LOGGER.info(f'Qualified item {symbol_name!r}: {name} -> {qualified_name}')
68+
mono_item_fn['name'] = qualified_name
69+
70+
71+
def _mono_item_fn_name(symbol_name: str, name: str) -> str:
72+
"""Extend ``name`` with a prefix from ``symbol_name``.
73+
74+
Example:
75+
Symbol: foo :: bar :: do_something :: h0123456789abcdef
76+
Name: baz :: do_something :: <&u128>
77+
Result: foo :: baz :: do_something :: <&u128>
78+
^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
79+
| |
80+
| +- kept from name
81+
+- taken from symbol
82+
"""
83+
84+
def extract_id(s: str) -> str | None:
85+
"""Extract a Rust id prefix from a string."""
86+
import re
87+
88+
m = re.match(r'^(?P<func>[a-zA-Z_][a-zA-Z0-9_]*)', s)
89+
if not m:
90+
return None
91+
return m['func']
92+
93+
symbol = _demangle(symbol_name)
94+
split_symbol = list(_symbol_segments(symbol))
95+
split_name = list(_symbol_segments(name))
96+
97+
assert len(split_symbol) >= 2, 'The symbol name should contain at least two segments, an identifier and a hash'
98+
# Extract the function name from `symbol_name`.
99+
# It's the last segment with a valid id as prefix that's not the hash
100+
i, fn_name = next(
101+
((i, fn_name) for i, s in enumerate(reversed(split_symbol[:-1])) if (fn_name := extract_id(s))), (None, None)
102+
)
103+
assert i is not None
104+
assert fn_name is not None
105+
symbol_index = len(split_symbol) - i - 2
106+
107+
# Find the index of the function name segment in the `split_name`
108+
name_index = next((len(split_name) - i - 1 for i, s in enumerate(reversed(split_name)) if s == fn_name), None)
109+
assert name_index is not None
110+
111+
if symbol_index < name_index:
112+
# Do not add a prefix if the name prefix is longer than the symbol prefix
113+
return name
114+
115+
# Construct the prefix and the result
116+
return '::'.join(chain(split_symbol[: symbol_index - name_index], split_name))
117+
118+
119+
def _demangle(symbol: str) -> str:
120+
import re
121+
122+
from rust_demangler import demangle # type: ignore [import-untyped]
123+
124+
res = demangle(symbol)
125+
res = re.sub(r'(?<!^)(?<!:)<', r'::<', res) # insert '::' before '<' if not at the beginning or preceded by ':'
126+
return res
127+
128+
129+
def _symbol_segments(s: str) -> Iterator[str]:
130+
"""Split a symbol at ``'::'`` not between ``'<'`` and ``'>'``."""
131+
it = iter(s)
132+
la = ''
133+
buf: list[str] = []
134+
135+
def consume() -> None:
136+
nonlocal la
137+
la = next(it, '')
138+
139+
depth = 0
140+
consume()
141+
while la:
142+
match la:
143+
case ':':
144+
consume()
145+
match la:
146+
case ':':
147+
consume()
148+
if depth:
149+
buf += [':', ':']
150+
else:
151+
yield ''.join(buf)
152+
buf.clear()
153+
case '':
154+
buf.append(':')
155+
break
156+
case _:
157+
buf += [':', la]
158+
consume()
159+
case '<':
160+
buf.append(la)
161+
consume()
162+
depth += 1
163+
case '>':
164+
buf.append(la)
165+
consume()
166+
depth -= 1
167+
case '':
168+
raise AssertionError('The outer loop should ensure this is unreachable')
169+
case _:
170+
buf.append(la)
171+
consume()
172+
173+
if depth != 0:
174+
raise ValueError(f'Unbalanced <> in symbol: {s}')
175+
yield ''.join(buf)
176+
177+
45178
def apply_offset(info: SMIRInfo, offset: int) -> None:
46179
# mutates the dictionary inside the SMIRInfo
47180
# all fields containing a `Ty` are updated, adding the given offset

kmir/src/tests/integration/data/crate-tests/single-bin/a_module::twice.expected renamed to kmir/src/tests/integration/data/crate-tests/single-bin/single_exe::a_module::twice.expected

File renamed without changes.

kmir/src/tests/integration/data/crate-tests/single-bin/main.expected renamed to kmir/src/tests/integration/data/crate-tests/single-bin/single_exe::main.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
│ (225 steps)
66
├─ 3 (terminal)
77
│ #EndProgram ~> .K
8-
│ function: main
98
109
┊ constraint: true
1110
┊ subst: ...

kmir/src/tests/integration/data/crate-tests/single-dylib/add.expected renamed to kmir/src/tests/integration/data/crate-tests/single-dylib/small_test_dylib::add.expected

File renamed without changes.

kmir/src/tests/integration/data/crate-tests/single-lib/testing::test_add_in_range.expected renamed to kmir/src/tests/integration/data/crate-tests/single-lib/small_test_lib::testing::test_add_in_range.expected

File renamed without changes.

kmir/src/tests/integration/data/crate-tests/two-crate-bin/main.expected renamed to kmir/src/tests/integration/data/crate-tests/two-crate-bin/crate2::main.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
│ (730 steps)
66
├─ 3 (terminal)
77
│ #EndProgram ~> .K
8-
│ function: main
98
109
┊ constraint: true
1110
┊ subst: ...

kmir/src/tests/integration/data/crate-tests/two-crate-dylib/test_crate1_with.expected renamed to kmir/src/tests/integration/data/crate-tests/two-crate-dylib/crate2::test_crate1_with.expected

File renamed without changes.

kmir/src/tests/unit/test_linker.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, NamedTuple
4+
5+
import pytest
6+
7+
if TYPE_CHECKING:
8+
from typing import Final
9+
10+
11+
class _TestData(NamedTuple):
12+
symbol: str
13+
name: str
14+
expected_demangled: str
15+
expected_qualified: str
16+
17+
18+
TEST_DATA: Final[tuple[_TestData, ...]] = (
19+
_TestData(
20+
symbol='_ZN3foo3bar12do_something17h0123456789abcdefE',
21+
name='baz::do_something::<&u128>',
22+
expected_demangled='foo::bar::do_something::h0123456789abcdef',
23+
expected_qualified='foo::baz::do_something::<&u128>',
24+
),
25+
_TestData(
26+
symbol='_ZN14small_test_lib3add17h67a713ff87afe55cE',
27+
name='add',
28+
expected_demangled='small_test_lib::add::h67a713ff87afe55c',
29+
expected_qualified='small_test_lib::add',
30+
),
31+
_TestData(
32+
symbol='_ZN14small_test_lib6assume17h9c6e78f80476cd7fE',
33+
name='assume',
34+
expected_demangled='small_test_lib::assume::h9c6e78f80476cd7f',
35+
expected_qualified='small_test_lib::assume',
36+
),
37+
_TestData(
38+
symbol='_ZN14small_test_lib7testing17test_add_in_range17h810360fe730dd322E',
39+
name='testing::test_add_in_range',
40+
expected_demangled='small_test_lib::testing::test_add_in_range::h810360fe730dd322',
41+
expected_qualified='small_test_lib::testing::test_add_in_range',
42+
),
43+
_TestData(
44+
symbol='_ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17h3e83eb3d49c5b5a8E',
45+
name='<&u128 as std::fmt::Debug>::fmt',
46+
expected_demangled='<&T as core::fmt::Debug>::fmt::h3e83eb3d49c5b5a8',
47+
expected_qualified='<&u128 as std::fmt::Debug>::fmt',
48+
),
49+
_TestData(
50+
symbol='_ZN4core3fmt3num51_$LT$impl$u20$core..fmt..Debug$u20$for$u20$u128$GT$3fmt17hfb9d82c7f008e36bE',
51+
name='core::fmt::num::<impl std::fmt::Debug for u128>::fmt',
52+
expected_demangled='core::fmt::num::<impl core::fmt::Debug for u128>::fmt::hfb9d82c7f008e36b',
53+
expected_qualified='core::fmt::num::<impl std::fmt::Debug for u128>::fmt',
54+
),
55+
_TestData(
56+
symbol='_ZN4core3ptr29drop_in_place$LT$$RF$u128$GT$17hb88ca2b7d29bccccE',
57+
name='std::ptr::drop_in_place::<&u128>',
58+
expected_demangled='core::ptr::drop_in_place::<&u128>::hb88ca2b7d29bcccc',
59+
expected_qualified='std::ptr::drop_in_place::<&u128>',
60+
),
61+
_TestData(
62+
symbol='_ZN4core9panicking13assert_failed17hdf64d315df90cf99E',
63+
name='core::panicking::assert_failed::<u128, u128>',
64+
expected_demangled='core::panicking::assert_failed::hdf64d315df90cf99',
65+
expected_qualified='core::panicking::assert_failed::<u128, u128>',
66+
),
67+
_TestData(
68+
symbol='_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h1f955f4ce95634f1E',
69+
name='std::rt::lang_start::<()>::{closure#0}',
70+
expected_demangled='std::rt::lang_start::{{closure}}::h1f955f4ce95634f1',
71+
expected_qualified='std::rt::lang_start::<()>::{closure#0}',
72+
),
73+
_TestData(
74+
symbol='_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17ha8802a5f1fccb12bE',
75+
name='<{closure@std::rt::lang_start<()>::{closure#0}} as std::ops::FnOnce<()>>::call_once',
76+
expected_demangled='core::ops::function::FnOnce::call_once{{vtable.shim}}::ha8802a5f1fccb12b',
77+
expected_qualified='core::ops::function::<{closure@std::rt::lang_start<()>::{closure#0}} as std::ops::FnOnce<()>>::call_once',
78+
),
79+
_TestData(
80+
symbol='_ZN4core6option15Option$LT$T$GT$3map17h5b7d515e7f40e720E',
81+
name='std::option::Option::<isize>::map::<crate1:MMyEnum, {closure@src/main.rs:19:29: 19:32}>',
82+
expected_demangled='core::option::Option::<T>::map::h5b7d515e7f40e720',
83+
expected_qualified='std::option::Option::<isize>::map::<crate1:MMyEnum, {closure@src/main.rs:19:29: 19:32}>',
84+
),
85+
)
86+
87+
88+
@pytest.mark.parametrize(
89+
'symbol,_name,expected,_qualified',
90+
TEST_DATA,
91+
ids=[symbol for symbol, *_ in TEST_DATA],
92+
)
93+
def test_demangle(
94+
symbol: str,
95+
_name: str,
96+
expected: str,
97+
_qualified: str,
98+
) -> None:
99+
from kmir.linker import _demangle
100+
101+
# When
102+
actual = _demangle(symbol=symbol)
103+
104+
# Then
105+
assert expected == actual
106+
107+
108+
@pytest.mark.parametrize(
109+
'symbol_name,name,_demangled,expected',
110+
TEST_DATA,
111+
ids=[symbol_name for symbol_name, *_ in TEST_DATA],
112+
)
113+
def test_mono_item_fn_name(
114+
symbol_name: str,
115+
name: str,
116+
_demangled: str,
117+
expected: str,
118+
) -> None:
119+
from kmir.linker import _mono_item_fn_name
120+
121+
# When
122+
actual = _mono_item_fn_name(symbol_name=symbol_name, name=name)
123+
124+
# Then
125+
assert expected == actual

0 commit comments

Comments
 (0)