Skip to content

Commit 8fba85a

Browse files
authored
Remove alloc decoding from the K definition (#729)
1 parent 71bc6f9 commit 8fba85a

25 files changed

Lines changed: 120 additions & 292 deletions

kmir/src/kmir/__main__.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from .build import HASKELL_DEF_DIR, LLVM_DEF_DIR, LLVM_LIB_DIR
1818
from .cargo import CargoProject
19-
from .kmir import KMIR, DecodeMode, KMIRAPRNodePrinter
19+
from .kmir import KMIR, KMIRAPRNodePrinter
2020
from .linker import link
2121
from .options import (
2222
GenSpecOpts,
@@ -352,13 +352,6 @@ def _arg_parser() -> ArgumentParser:
352352
prove_rs_parser.add_argument(
353353
'--start-symbol', type=str, metavar='SYMBOL', default='main', help='Symbol name to begin execution from'
354354
)
355-
prove_rs_parser.add_argument(
356-
'--decode-mode',
357-
type=DecodeMode,
358-
metavar='DECODE_MODE',
359-
default=DecodeMode.NONE,
360-
help='Allocation decoding mode: NONE (default), PARTIAL, or FULL',
361-
)
362355

363356
link_parser = command_parser.add_parser(
364357
'link', help='Link together 2 or more SMIR JSON files', parents=[kcli_args.logging_args]
@@ -435,7 +428,6 @@ def _parse_args(ns: Namespace) -> KMirOpts:
435428
save_smir=ns.save_smir,
436429
smir=ns.smir,
437430
start_symbol=ns.start_symbol,
438-
decode_mode=ns.decode_mode,
439431
)
440432
case 'link':
441433
return LinkOpts(

kmir/src/kmir/alloc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def from_dict(dct: dict[str, Any]) -> ProvenanceMap:
7979
ptrs=[
8080
ProvenanceEntry(
8181
offset=int(size),
82-
prov=AllocId(prov),
82+
alloc_id=AllocId(prov),
8383
)
8484
for size, prov in dct['ptrs']
8585
],
@@ -88,4 +88,4 @@ def from_dict(dct: dict[str, Any]) -> ProvenanceMap:
8888

8989
class ProvenanceEntry(NamedTuple):
9090
offset: int
91-
prov: AllocId
91+
alloc_id: AllocId

kmir/src/kmir/decoding.py

Lines changed: 46 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
from typing import TYPE_CHECKING
55

66
from pyk.kast.inner import KApply
7-
from pyk.kast.prelude.bytes import bytesToken
8-
from pyk.kast.prelude.kint import intToken
7+
from pyk.kast.prelude.string import stringToken
98

109
from .alloc import Allocation, AllocInfo, Memory, ProvenanceEntry, ProvenanceMap
1110
from .ty import ArrayT, Bool, EnumT, Int, IntTy, PtrT, RefT, Uint
@@ -32,28 +31,12 @@
3231

3332
@dataclass
3433
class UnableToDecodeValue(Value):
35-
data: bytes
36-
type_info: TypeMetadata
34+
msg: str
3735

3836
def to_kast(self) -> KInner:
3937
return KApply(
40-
'Evaluation::UnableToDecodeValue',
41-
bytesToken(self.data),
42-
KApply('TypeInfo::VoidType'), # TODO: TypeInfo -> KAST transformation
43-
)
44-
45-
46-
@dataclass
47-
class UnableToDecodeAlloc(Value):
48-
data: bytes
49-
ty: Ty
50-
51-
def to_kast(self) -> KInner:
52-
return KApply(
53-
'Evaluation::UnableToDecodeAlloc',
54-
bytesToken(self.data),
55-
KApply('ty', intToken(self.ty)),
56-
KApply('ProvenanceMapEntries::empty'), # TODO
38+
'Evaluation::UnableToDecodePy',
39+
stringToken(self.msg),
5740
)
5841

5942

@@ -71,34 +54,50 @@ def decode_alloc_or_unable(alloc_info: AllocInfo, types: Mapping[Ty, TypeMetadat
7154
),
7255
):
7356
data = bytes(n or 0 for n in bytez)
74-
type_info = types[ty]
75-
76-
match ptrs:
77-
case []:
78-
return decode_value_or_unable(data=data, type_info=type_info, types=types)
79-
80-
case [ProvenanceEntry(0, alloc_id)]:
81-
if (pointee_ty := _pointee_ty(type_info)) is not None: # ensures this is a reference type
82-
pointee_type_info = types[pointee_ty]
83-
metadata = _metadata(pointee_type_info)
84-
85-
if len(data) == 8:
86-
# single slim pointer (assumes usize == u64)
87-
return AllocRefValue(alloc_id=alloc_id, metadata=metadata)
88-
89-
if len(data) == 16 and metadata == DynamicSize(1):
90-
# sufficient data to decode dynamic size (assumes usize == u64)
91-
# expect fat pointer
92-
return AllocRefValue(
93-
alloc_id=alloc_id,
94-
metadata=DynamicSize(int.from_bytes(data[8:16], byteorder='little', signed=False)),
95-
)
96-
97-
return UnableToDecodeAlloc(data=data, ty=ty)
57+
return _decode_memory_alloc_or_unable(data=data, ptrs=ptrs, ty=ty, types=types)
9858
case _:
9959
raise AssertionError('Unhandled case')
10060

10161

62+
def _decode_memory_alloc_or_unable(
63+
data: bytes,
64+
ptrs: list[ProvenanceEntry],
65+
ty: Ty,
66+
types: Mapping[Ty, TypeMetadata],
67+
) -> Value:
68+
try:
69+
type_info = types[ty]
70+
except KeyError:
71+
return UnableToDecodeValue(f'Unknown type: {ty}')
72+
73+
match ptrs:
74+
case []:
75+
return decode_value_or_unable(data=data, type_info=type_info, types=types)
76+
77+
case [ProvenanceEntry(0, alloc_id)]:
78+
if (pointee_ty := _pointee_ty(type_info)) is not None: # ensures this is a reference type
79+
try:
80+
pointee_type_info = types[pointee_ty]
81+
except KeyError:
82+
return UnableToDecodeValue(f'Unknown pointee type: {pointee_ty}')
83+
84+
metadata = _metadata(pointee_type_info)
85+
86+
if len(data) == 8:
87+
# single slim pointer (assumes usize == u64)
88+
return AllocRefValue(alloc_id=alloc_id, metadata=metadata)
89+
90+
if len(data) == 16 and metadata == DynamicSize(1):
91+
# sufficient data to decode dynamic size (assumes usize == u64)
92+
# expect fat pointer
93+
return AllocRefValue(
94+
alloc_id=alloc_id,
95+
metadata=DynamicSize(int.from_bytes(data[8:16], byteorder='little', signed=False)),
96+
)
97+
98+
return UnableToDecodeValue(f'Unable to decode alloc: {data!r}, of type: {type_info}')
99+
100+
102101
def _pointee_ty(type_info: TypeMetadata) -> Ty | None:
103102
match type_info:
104103
case PtrT(ty) | RefT(ty):
@@ -120,8 +119,8 @@ def _metadata(type_info: TypeMetadata) -> Metadata:
120119
def decode_value_or_unable(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMetadata]) -> Value:
121120
try:
122121
return decode_value(data=data, type_info=type_info, types=types)
123-
except ValueError:
124-
return UnableToDecodeValue(data=data, type_info=type_info)
122+
except ValueError as err:
123+
return UnableToDecodeValue(f'Unable to decode value: {data!r}, of type: {type_info}: {err}')
125124

126125

127126
def decode_value(data: bytes, type_info: TypeMetadata, types: Mapping[Ty, TypeMetadata]) -> Value:

kmir/src/kmir/kdist/mir-semantics/rt/decoding.md

Lines changed: 4 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,12 @@ from raw bytes to typed `Value` objects according to Rust's memory layout rules.
1414
requires "../ty.md"
1515
requires "value.md"
1616
requires "numbers.md"
17-
requires "../alloc.md"
1817
1918
module RT-DECODING
2019
imports BOOL
2120
imports MAP
2221
2322
imports TYPES
24-
imports ALLOC
2523
imports RT-VALUE-SYNTAX
2624
imports RT-NUMBERS
2725
imports RT-TYPES
@@ -37,9 +35,9 @@ and arrays (where layout is trivial).
3735
### Decoding `PrimitiveType`s
3836

3937
```k
40-
syntax Evaluation ::= #decodeValue ( Bytes , TypeInfo , Map ) [function, total, symbol(decodeValue)]
41-
| UnableToDecodeValue( Bytes , TypeInfo ) [symbol(Evaluation::UnableToDecodeValue)]
42-
| UnableToDecodeAlloc( Bytes , Ty , ProvenanceMapEntries ) [symbol(Evaluation::UnableToDecodeAlloc)]
38+
syntax Evaluation ::= #decodeValue ( Bytes , TypeInfo , Map ) [function, total, symbol(decodeValue)]
39+
| UnableToDecode ( Bytes , TypeInfo ) [symbol(Evaluation::UnableToDecode)]
40+
| UnableToDecodePy ( msg: String ) [symbol(Evaluation::UnableToDecodePy)]
4341
4442
// Boolean: should be one byte with value one or zero
4543
rule #decodeValue(BYTES, typeInfoPrimitiveType(primTypeBool), _TYPEMAP) => BoolVal(false)
@@ -83,7 +81,7 @@ rule #decodeValue(BYTES, typeInfoArrayType(ELEMTY, noTyConst), TYPEMAP)
8381
All unimplemented cases will become thunks by way of this default rule:
8482

8583
```k
86-
rule #decodeValue(BYTES, TYPEINFO, _TYPEMAP) => UnableToDecodeValue(BYTES, TYPEINFO) [owise]
84+
rule #decodeValue(BYTES, TYPEINFO, _TYPEMAP) => UnableToDecode(BYTES, TYPEINFO) [owise]
8785
```
8886

8987
## Helper function to determine the expected byte length for a type
@@ -283,100 +281,6 @@ by the element size, then uses the same element-by-element decoding approach as
283281
[preserves-definedness]
284282
```
285283

286-
# Decoding the allocated constants into a memory map
287-
288-
When the program starts, all allocated constants are stored in the `<memory>` cell in the configuration,
289-
as a mapping of `AllocId -> Value`, decoding the allocations to values using the above functions.
290-
291-
Besides actual byte allocations, we often find allocations with provenance,
292-
as slim or fat pointers pointing to other static allocations.
293-
One example of this is allocated error strings, whose length is stored in a fat pointer that is itself an allocated constant.
294-
295-
296-
The basic decoding function is very similar to `#decodeConstant` but returns its result as a singleton `Map` instead of a mere value.
297-
298-
```k
299-
syntax Map ::= #decodeAlloc ( AllocId , Ty , Allocation , Map ) [function] // returns AllocId |-> Value
300-
// ----------------------------------------------------------
301-
rule #decodeAlloc(ID, TY, allocation(BYTES, provenanceMap(.ProvenanceMapEntries), _ALIGN, _MUT), TYPEMAP)
302-
=> ID |-> #decodeValue(BYTES, {TYPEMAP[TY]}:>TypeInfo, TYPEMAP)
303-
requires TY in_keys(TYPEMAP)
304-
andBool isTypeInfo(TYPEMAP[TY])
305-
```
306-
307-
If there are entries in the provenance map, then the decoder must create references to other allocations.
308-
Each entry replaces a pointer starting at a certain offset in the bytes.
309-
The pointee of the respective pointer is determined by type _layout_ of the type to decode.
310-
311-
Simple cases of offset 0 and reference types can be implemented here without consideration of layout.
312-
The reference metadata is either determined statically by type, or filled in from the bytes for fat pointers.
313-
314-
```k
315-
// if length(BYTES) ==Int 16 decode BYTES[9..16] as dynamic size.
316-
rule #decodeAlloc(
317-
ID,
318-
TY,
319-
allocation(BYTES, provenanceMap(provenanceMapEntry(0, REF_ID) ), _ALIGN, _MUT),
320-
TYPEMAP
321-
)
322-
=> ID |-> AllocRef(REF_ID, .ProjectionElems, dynamicSize(Bytes2Int(substrBytes(BYTES, 8, 16), LE, Unsigned)))
323-
requires TY in_keys(TYPEMAP)
324-
andBool isTypeInfo(TYPEMAP[TY])
325-
andBool isTy(pointeeTy({TYPEMAP[TY]}:>TypeInfo)) // ensures this is a reference type
326-
andBool lengthBytes(BYTES) ==Int 16 // sufficient data to decode dynamic size (assumes usize == u64)
327-
andBool dynamicSize(1) ==K #metadata(pointeeTy({TYPEMAP[TY]}:>TypeInfo), TYPEMAP) // expect fat pointer
328-
[preserves-definedness] // valid type lookup ensured
329-
330-
// Otherwise query type information for static size and insert it.
331-
rule #decodeAlloc(
332-
ID,
333-
TY,
334-
allocation(BYTES, provenanceMap(provenanceMapEntry(0, REF_ID) ), _ALIGN, _MUT),
335-
TYPEMAP
336-
)
337-
=> ID |-> AllocRef(REF_ID, .ProjectionElems, #metadata(pointeeTy({TYPEMAP[TY]}:>TypeInfo), TYPEMAP))
338-
requires TY in_keys(TYPEMAP)
339-
andBool isTypeInfo(TYPEMAP[TY])
340-
andBool isTy(pointeeTy({TYPEMAP[TY]}:>TypeInfo)) // ensures this is a reference type
341-
andBool lengthBytes(BYTES) ==Int 8 // single slim pointer (assumes usize == u64)
342-
[priority(60), preserves-definedness] // valid type lookup ensured
343-
```
344-
345-
Allocations with more than one provenance map entry or witn non-reference types remain undecoded.
346-
347-
```k
348-
rule #decodeAlloc(ID, TY, allocation(BYTES, provenanceMap(ENTRIES), _ALIGN, _MUT), _TYPEMAP)
349-
=> ID |-> UnableToDecodeAlloc(BYTES, TY, ENTRIES)
350-
[owise]
351-
```
352-
353-
The entire set of `GlobalAllocs` is decoded by iterating over the list.
354-
It is assumed that the given `Ty -> TypeInfo` map contains all types required.
355-
356-
```k
357-
syntax Map ::= #decodeAllocs ( GlobalAllocs , Map ) [function, total, symbol("decodeAllocs") ] // AllocId |-> Thing
358-
| #decodeAllocsAux ( Map , GlobalAllocs , Map ) [function, total, symbol("decodeAllocsAux")] // accumulating version
359-
// -----------------------------------------------------------------------------------------------
360-
rule #decodeAllocs(ALLOCS, TYPEMAP) => #decodeAllocsAux(.Map, ALLOCS, TYPEMAP)
361-
362-
rule #decodeAllocsAux(ACCUM, .GlobalAllocs, _TYPEMAP) => ACCUM
363-
rule #decodeAllocsAux(ACCUM, globalAllocEntry(ID, TY, Memory(ALLOC)) ALLOCS, TYPEMAP)
364-
=> #decodeAllocsAux(ACCUM #decodeAlloc(ID, TY, ALLOC, TYPEMAP), ALLOCS, TYPEMAP)
365-
requires TY in_keys(TYPEMAP)
366-
[preserves-definedness]
367-
```
368-
369-
If the type of an alloc cannot be found, or for non-`Memory` allocs, the raw data is stored in a super-sort of `Value`.
370-
This ensures that the function is total (anyway lookups require constraining the sort).
371-
372-
```k
373-
syntax AllocData ::= Value | AllocData ( Ty , GlobalAllocKind )
374-
375-
rule #decodeAllocsAux(ACCUM, globalAllocEntry(ID, TY, OTHER) ALLOCS, TYPEMAP)
376-
=> #decodeAllocsAux(ACCUM ID |-> AllocData(TY, OTHER), ALLOCS, TYPEMAP)
377-
[owise]
378-
```
379-
380284
```k
381285
endmodule
382286
```

0 commit comments

Comments
 (0)