Skip to content

Commit 7c50f5c

Browse files
jbertholdtothtamas28Stevengrejuliankuners
authored
Updates from master (#763)
* Python code refactoring * decoding `struct`s * handle functions with empty body * Fix for statistics (several paths) * nix overlay fix --------- Co-authored-by: Tamás Tóth <tothtamas28@users.noreply.github.com> Co-authored-by: JianHong Zhao <zhaojianhong96@gmail.com> Co-authored-by: Julian Kuners <julian.kuners@gmail.com>
1 parent 3cadd11 commit 7c50f5c

16 files changed

Lines changed: 492 additions & 87 deletions

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ MIR Semantics provides a K Framework-based formal semantics for Rust's Stable MI
1313
# Initial setup - install stable-mir-json tool for SMIR generation
1414
make stable-mir-json
1515

16-
# Build K semantics definitions (required after any K file changes)
16+
# Build K semantics definitions (required after K file changes)
1717
make build
1818

1919
# Full build and check

flake.nix

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,22 @@
7373
final: prev:
7474
let
7575
rev = self.rev or null;
76+
kmir-pyk-pyproject = final.callPackage ./nix/kmir-pyk-pyproject { inherit uv2nix; };
7677
kmir-pyk = final.callPackage ./nix/kmir-pyk {
77-
inherit pyproject-nix pyproject-build-systems uv2nix;
78+
inherit pyproject-nix pyproject-build-systems kmir-pyk-pyproject;
7879
python = final."python${pythonVer}";
80+
pyproject-overlays = [ (k-framework.overlays.pyk-pyproject system) ];
7981
};
8082
kmir = final.callPackage ./nix/kmir { inherit kmir-pyk rev; };
8183
kmir-package-test = final.callPackage ./nix/test/package.nix { inherit rev; };
8284
in
8385
{
84-
inherit kmir-pyk kmir kmir-package-test;
86+
inherit
87+
kmir-pyk
88+
kmir
89+
kmir-package-test
90+
kmir-pyk-pyproject
91+
;
8592
};
8693
pkgs = import nixpkgs {
8794
inherit system;
@@ -137,5 +144,12 @@
137144
)
138145
// {
139146
overlays.default = final: prev: { inherit (self.packages.${final.system}) kmir; };
147+
pyprojectOverlays = {
148+
# this pyproject-nix overlay allows for overriding the python packages that are otherwise locked in `uv.lock`
149+
# by using this overlay in dependant nix flakes, you ensure that nix overrides also override the python package
150+
pyk-pyproject = system: final: prev: {
151+
inherit (self.packages.${system}.kmir-pyk-pyproject.lockFileOverlay final prev) kmir-pyk;
152+
};
153+
};
140154
};
141155
}

kmir/src/kmir/decoding.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pyk.kast.inner import KApply
77
from pyk.kast.prelude.string import stringToken
88

9-
from .alloc import Allocation, AllocInfo, Memory, ProvenanceEntry, ProvenanceMap
9+
from .alloc import Allocation, AllocInfo, Function, Memory, ProvenanceEntry, ProvenanceMap, Static, VTable
1010
from .ty import (
1111
ArbitraryFields,
1212
ArrayT,
@@ -21,6 +21,7 @@
2121
RefT,
2222
Single,
2323
StrT,
24+
StructT,
2425
UintT,
2526
)
2627
from .value import (
@@ -71,6 +72,28 @@ def decode_alloc_or_unable(alloc_info: AllocInfo, types: Mapping[Ty, TypeMetadat
7172
):
7273
data = bytes(n or 0 for n in bytez)
7374
return _decode_memory_alloc_or_unable(data=data, ptrs=ptrs, ty=ty, types=types)
75+
case AllocInfo(
76+
ty=_,
77+
# `Static` currently only carries `def_id`; we ignore it here.
78+
global_alloc=Static(),
79+
):
80+
# Static global alloc does not carry raw bytes here; leave as unable-to-decode placeholder
81+
return UnableToDecodeValue('Static global allocation not decoded')
82+
case AllocInfo(
83+
ty=_,
84+
global_alloc=Function(
85+
instance=_,
86+
),
87+
):
88+
# Function alloc currently not decoded to a runtime value
89+
return UnableToDecodeValue('Function global allocation not decoded')
90+
case AllocInfo(
91+
ty=_,
92+
# `VTable` carries `ty` and optional `binder`; we ignore both here.
93+
global_alloc=VTable(),
94+
):
95+
# VTable alloc currently not decoded to a runtime value
96+
return UnableToDecodeValue('VTable global allocation not decoded')
7497
case _:
7598
raise AssertionError('Unhandled case')
7699

@@ -149,6 +172,8 @@ def decode_value(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMe
149172
return _decode_int(data, int_ty)
150173
case ArrayT(elem_ty, length):
151174
return _decode_array(data, elem_ty, length, types)
175+
case StructT(fields=fields, layout=layout):
176+
return _decode_struct(data=data, fields=fields, layout=layout, types=types)
152177
case EnumT(
153178
discriminants=discriminants,
154179
fields=fields,
@@ -219,6 +244,28 @@ def _decode_array(
219244
return RangeValue(elems)
220245

221246

247+
def _decode_struct(
248+
*,
249+
data: bytes,
250+
fields: list[Ty],
251+
layout: LayoutShape | None,
252+
types: Mapping[Ty, TypeMetadata],
253+
) -> Value:
254+
if not layout:
255+
raise ValueError('Struct layout not provided')
256+
257+
offsets = _extract_offsets(layout.fields)
258+
259+
match layout.variants:
260+
case Single(index=0):
261+
pass
262+
case _:
263+
raise ValueError(f'Unexpected layout variants in struct: {layout.variants}')
264+
265+
field_values = _decode_fields(data=data, tys=fields, offsets=offsets, types=types)
266+
return AggregateValue(0, field_values)
267+
268+
222269
def _decode_enum(
223270
*,
224271
data: bytes,

kmir/src/kmir/kmir.py

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -87,45 +87,61 @@ def kcfg_explore(self, label: str | None = None) -> Iterator[KCFGExplore]:
8787
) as cts:
8888
yield KCFGExplore(cts, kcfg_semantics=KMIRSemantics())
8989

90-
def make_call_config(
90+
def _make_concrete_call_config(self, smir_info: SMIRInfo, *, start_symbol: str = 'main') -> KInner:
91+
def init_subst() -> dict[str, KInner]:
92+
init_config = self.definition.init_config(KSort('GeneratedTopCell'))
93+
_, res = split_config_from(init_config)
94+
return res
95+
96+
if not start_symbol in smir_info.function_tys:
97+
raise KeyError(f'{start_symbol} not found in program')
98+
99+
args_info = smir_info.function_arguments[start_symbol]
100+
if args_info:
101+
raise ValueError(
102+
f'Cannot create concrete call configuration for {start_symbol}: function has parameters: {args_info}'
103+
)
104+
105+
# The configuration is the default initial configuration, with the K cell updated with the call terminator
106+
# TODO: see if this can be expressed in more simple terms
107+
subst = Subst(
108+
{
109+
**init_subst(),
110+
**{
111+
'K_CELL': mk_call_terminator(smir_info.function_tys[start_symbol], len(args_info)),
112+
},
113+
}
114+
)
115+
empty_config = self.definition.empty_config(KSort('GeneratedTopCell'))
116+
config = subst(empty_config)
117+
assert not free_vars(config), f'Config by construction should not have any free variables: {config}'
118+
return config
119+
120+
def _make_symbolic_call_config(
91121
self,
92122
smir_info: SMIRInfo,
93123
*,
94124
start_symbol: str = 'main',
95-
sort: str = 'GeneratedTopCell',
96-
init: bool = False,
97125
) -> tuple[KInner, list[KInner]]:
98126
if not start_symbol in smir_info.function_tys:
99127
raise KeyError(f'{start_symbol} not found in program')
100128

101129
args_info = smir_info.function_arguments[start_symbol]
102130
locals, constraints = symbolic_locals(smir_info, args_info)
103-
104-
_subst = {
105-
'K_CELL': mk_call_terminator(smir_info.function_tys[start_symbol], len(args_info)),
106-
'STACK_CELL': list_empty(), # FIXME see #560, problems matching a symbolic stack
107-
'LOCALS_CELL': list_of(locals),
108-
}
109-
110-
_init_subst: dict[str, KInner] = {}
111-
if init:
112-
_subst['LOCALS_CELL'] = list_empty()
113-
init_config = self.definition.init_config(KSort(sort))
114-
_, _init_subst = split_config_from(init_config)
115-
116-
for key in _init_subst:
117-
if key not in _subst:
118-
_subst[key] = _init_subst[key]
119-
120-
subst = Subst(_subst)
121-
config = self.definition.empty_config(KSort(sort))
122-
return (subst.apply(config), constraints)
131+
subst = Subst(
132+
{
133+
'K_CELL': mk_call_terminator(smir_info.function_tys[start_symbol], len(args_info)),
134+
'STACK_CELL': list_empty(), # FIXME see #560, problems matching a symbolic stack
135+
'LOCALS_CELL': list_of(locals),
136+
},
137+
)
138+
empty_config = self.definition.empty_config(KSort('GeneratedTopCell'))
139+
config = subst(empty_config)
140+
return config, constraints
123141

124142
def run_smir(self, smir_info: SMIRInfo, start_symbol: str = 'main', depth: int | None = None) -> Pattern:
125143
smir_info = smir_info.reduce_to(start_symbol)
126-
init_config, init_constraints = self.make_call_config(smir_info, start_symbol=start_symbol, init=True)
127-
if len(free_vars(init_config)) > 0 or len(init_constraints) > 0:
128-
raise ValueError(f'Cannot run function with variables: {start_symbol} - {free_vars(init_config)}')
144+
init_config = self._make_concrete_call_config(smir_info, start_symbol=start_symbol)
129145
init_kore = self.kast_to_kore(init_config, KSort('GeneratedTopCell'))
130146
result = self.run_pattern(init_kore, depth=depth)
131147
return result
@@ -135,10 +151,9 @@ def apr_proof_from_smir(
135151
id: str,
136152
smir_info: SMIRInfo,
137153
start_symbol: str = 'main',
138-
sort: str = 'GeneratedTopCell',
139154
proof_dir: Path | None = None,
140155
) -> APRProof:
141-
lhs_config, constraints = self.make_call_config(smir_info, start_symbol=start_symbol, sort=sort)
156+
lhs_config, constraints = self._make_symbolic_call_config(smir_info, start_symbol=start_symbol)
142157
lhs = CTerm(lhs_config, constraints)
143158

144159
var_config, var_subst = split_config_from(lhs_config)
@@ -181,7 +196,18 @@ def prove_rs(opts: ProveRSOpts) -> APRProof:
181196
smir_info = SMIRInfo(cargo_get_smir_json(opts.rs_file, save_smir=opts.save_smir))
182197

183198
smir_info = smir_info.reduce_to(opts.start_symbol)
199+
# Report whether the reduced call graph includes any functions without MIR bodies
200+
missing_body_syms = [
201+
sym
202+
for sym, item in smir_info.items.items()
203+
if 'MonoItemFn' in item['mono_item_kind']
204+
and item['mono_item_kind']['MonoItemFn'].get('body') is None
205+
]
206+
has_missing = len(missing_body_syms) > 0
184207
_LOGGER.info(f'Reduced items table size {len(smir_info.items)}')
208+
if has_missing:
209+
_LOGGER.info(f'missing-bodies-present={has_missing} count={len(missing_body_syms)}')
210+
_LOGGER.debug(f'Missing-body function symbols (first 5): {missing_body_syms[:5]}')
185211

186212
kmir = KMIR.from_kompiled_kore(
187213
smir_info, symbolic=True, bug_report=opts.bug_report, target_dir=str(target_path)

kmir/src/kmir/linker.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,14 @@ def apply_offset(info: SMIRInfo, offset: int) -> None:
208208
match global_alloc:
209209
case {'Memory': allocation}: # global_alloc: Memory, allocation: Allocation
210210
apply_offset_provenance(allocation['provenance'], offset)
211+
case {'Static': alloc_id}: # global_alloc: Static
212+
alloc['global_alloc']['Static'] = alloc_id + offset
213+
case {'Function': _}: # global_alloc: Function
214+
# Quick-compat: leave function instance as-is; not used for offset computations
215+
pass
216+
case {'VTable': _}: # global_alloc: VTable
217+
# Quick-compat: keep as-is (if present). We do not offset embedded types here.
218+
pass
211219
case _:
212220
raise ValueError('Unsupported or invalid GlobalAlloc data: {global_alloc}')
213221

@@ -249,17 +257,20 @@ def apply_offset_item(item: dict, offset: int) -> None:
249257
# * traverses function locals and debug information to add `offset` to all `span` fields
250258
if 'MonoItemFn' in item and 'body' in item['MonoItemFn']:
251259
body = item['MonoItemFn']['body']
252-
for local in body['locals']:
260+
if body is None:
261+
_LOGGER.warning(f"MonoItemFn {item['MonoItemFn'].get('name', '<unknown>')!r} has no body; skipping offsets")
262+
return
263+
for local in body.get('locals', []):
253264
local['ty'] += offset
254265
local['span'] += offset
255-
for block in body['blocks']:
266+
for block in body.get('blocks', []):
256267
for stmt in block['statements']:
257268
apply_offset_stmt(stmt['kind'], offset)
258269
stmt['span'] += offset
259270
apply_offset_terminator(block['terminator']['kind'], offset)
260271
block['terminator']['span'] += offset
261272
# adjust span in var_debug_info, each item's source_info.span
262-
for thing in body['var_debug_info']:
273+
for thing in body.get('var_debug_info', []):
263274
thing['source_info']['span'] += offset
264275
if 'Constant' in thing['value']:
265276
apply_offset_operand({'Constant': thing['value']}, offset)

kmir/src/kmir/smir.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,15 @@ def function_arguments(self) -> dict[str, list[dict]]:
104104

105105
mono_item_fn = item['mono_item_kind']['MonoItemFn']
106106
name = mono_item_fn['name']
107-
arg_count = mono_item_fn['body']['arg_count']
108-
local_args = mono_item_fn['body']['locals'][1 : arg_count + 1]
107+
body = mono_item_fn.get('body')
108+
if body is None:
109+
# Functions without a MIR body (e.g., externs/const shims) have no arguments to inspect.
110+
# Skip adding entries for them; callers should not rely on args for such symbols.
111+
_LOGGER.debug(f'Skipping function_arguments for {name}: missing body')
112+
continue
113+
114+
arg_count = body['arg_count']
115+
local_args = body['locals'][1 : arg_count + 1]
109116
res[name] = local_args
110117
return res
111118

@@ -195,12 +202,16 @@ def call_edges(self) -> dict[Ty, set[Ty]]:
195202
# skip functions not present in the `function_symbols_reverse` table
196203
if not sym in self.function_symbols_reverse:
197204
continue
198-
called_funs = [
199-
b['terminator']['kind']['Call']['func']
200-
for b in item['mono_item_kind']['MonoItemFn']['body']['blocks']
201-
if 'Call' in b['terminator']['kind']
202-
]
203-
called_tys = {Ty(op['Constant']['const_']['ty']) for op in called_funs if 'Constant' in op}
205+
body = item['mono_item_kind']['MonoItemFn'].get('body')
206+
if body is None or 'blocks' not in body:
207+
# No MIR body means we cannot extract call edges; treat as having no outgoing edges.
208+
_LOGGER.debug(f'Skipping call edge extraction for {sym}: missing body')
209+
called_tys: set[Ty] = set()
210+
else:
211+
called_funs = [
212+
b['terminator']['kind']['Call']['func'] for b in body['blocks'] if 'Call' in b['terminator']['kind']
213+
]
214+
called_tys = {Ty(op['Constant']['const_']['ty']) for op in called_funs if 'Constant' in op}
204215
# TODO also add any constant operands used as arguments whose ty refer to a function
205216
# the semantics currently does not support this, see issue #488 and stable-mir-json issue #55
206217
for ty in self.function_symbols_reverse[sym]:

kmir/src/kmir/ty.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def from_raw(data: Any) -> EnumT:
146146
adt_def=adt_def,
147147
discriminants=list(discriminants),
148148
fields=[list(tys) for tys in fields],
149-
layout=LayoutShape.from_raw(layout),
149+
layout=LayoutShape.from_raw(layout) if layout is not None else None,
150150
)
151151
case _:
152152
raise _cannot_parse_as('EnumT', data)
@@ -467,6 +467,7 @@ class StructT(TypeMetadata):
467467
name: str
468468
adt_def: int
469469
fields: list[Ty]
470+
layout: LayoutShape | None
470471

471472
@staticmethod
472473
def from_raw(data: Any) -> StructT:
@@ -476,16 +477,25 @@ def from_raw(data: Any) -> StructT:
476477
'name': name,
477478
'adt_def': adt_def,
478479
'fields': fields,
480+
'layout': layout,
479481
}
480482
}:
481483
return StructT(
482484
name=name,
483485
adt_def=adt_def,
484486
fields=list(fields),
487+
layout=LayoutShape.from_raw(layout) if layout is not None else None,
485488
)
486489
case _:
487490
raise _cannot_parse_as('StructT', data)
488491

492+
def nbytes(self, types: Mapping[Ty, TypeMetadata]) -> int:
493+
match self.layout:
494+
case None:
495+
raise ValueError(f'Cannot determine size, layout is missing for: {self}')
496+
case LayoutShape(size=size):
497+
return size.in_bytes
498+
489499

490500
@dataclass
491501
class UnionT(TypeMetadata):

0 commit comments

Comments
 (0)