From f8f55a58bc7e09f41ab4d35eb75988a0b23d3eb0 Mon Sep 17 00:00:00 2001 From: Jost Berthold Date: Wed, 3 Dec 2025 16:56:14 +1100 Subject: [PATCH 1/2] Stratify lookup functions to speed up llvm-kompile-matching (#875) Instead of equations for a single function, `K` helper functions are generated and the equations split (more or less?) evenly among them (by modulo of the Int-valued IDs). ``` syntax TypeInfo ::= lookupType0(Int) [function, total] syntax TypeInfo ::= lookupType1(Int) [function, total] ... syntax Typeinfo ::= lookupType(Int) [function, total] rule lookupType(ty(N)) => lookupType0(N) requires N %Int K ==Int 0 rule lookupType(ty(N)) => lookupType1(N) requires N %Int K ==Int 1 ... rule lookupType(ty(N)) => lookupType(N) requires N %Int K ==Int ``` Each of the helpers gets its own default, all are total. ``` rule lookupType0(_) => TypeInfoVoidType [owise] // (i.e., not found) rule lookupType1(_) => TypeInfoVoidType [owise] // (i.e., not found) ... ``` For a given `TypeMapping(ty('X), 'TYPEINFO)`, the `X` is divided by `K` and the equation is generated in the respective helper function `m = 'X %Int K` ``` rule lookupType'('X) => 'TYPEINFO ``` This stratification is applied for the `lookupAlloc` and `lookupTy` functions. --- kmir/src/kmir/kdist/mir-semantics/rt/value.md | 16 +- kmir/src/kmir/kompile.py | 164 ++++++++++++++---- 2 files changed, 138 insertions(+), 42 deletions(-) diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/value.md b/kmir/src/kmir/kdist/mir-semantics/rt/value.md index 5dd293d41..23accb633 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/value.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/value.md @@ -136,26 +136,22 @@ The basic operations of reading and writing those values can use K's "heating" a These functions are global static data accessed from many places, and will be extended for the particular program. -**TODO find a better home for these definitions.** ```k // // function store, Ty -> MonoItemFn syntax MonoItemKind ::= lookupFunction ( Ty ) [function, total, symbol(lookupFunction)] - // ------------------------------------------------------------ - rule lookupFunction(ty(TY)) => monoItemFn(symbol("** UNKNOWN FUNCTION **"), defId(TY), noBody) [owise] // HACK - // cannot be total without a default "error" element. `Ty` is key for both functions and data. // // static allocations: AllocId -> AllocData (Value or error) syntax Evaluation ::= lookupAlloc ( AllocId ) [function, total, symbol(lookupAlloc)] - // ----------------------------------------------------------- - rule lookupAlloc(ID) => InvalidAlloc(ID) [owise] - - syntax Evaluation ::= InvalidAlloc ( AllocId ) + | InvalidAlloc ( AllocId ) // error marker // // static information about the base type interning in the MIR: Ty -> TypeInfo syntax TypeInfo ::= lookupTy ( Ty ) [function, total, symbol(lookupTy)] - // ----------------------------------------------------- - rule lookupTy(_TY) => typeInfoVoidType [owise] // HACK + + // default rules (unused, only required for compilation of the base semantics) + rule lookupFunction(ty(TY)) => monoItemFn(symbol("** UNKNOWN FUNCTION **"), defId(TY), noBody ) [owise] + rule lookupAlloc(ID) => InvalidAlloc(ID) [owise] + rule lookupTy(_) => typeInfoFunType(" ** INVALID LOOKUP CALL **" ) [owise] ``` ```k diff --git a/kmir/src/kmir/kompile.py b/kmir/src/kmir/kompile.py index e6bcaaa33..cecd65215 100644 --- a/kmir/src/kmir/kompile.py +++ b/kmir/src/kmir/kompile.py @@ -7,18 +7,21 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from pyk.kast.inner import KApply, KSort +from pyk.kast.inner import KApply, KSort, KToken, KVariable from pyk.kast.prelude.kint import intToken from pyk.kast.prelude.string import stringToken +from pyk.kore.syntax import App, EVar, SortApp, String, Symbol, SymbolDecl from .build import HASKELL_DEF_DIR, LLVM_DEF_DIR, LLVM_LIB_DIR from .kmir import KMIR if TYPE_CHECKING: + from collections.abc import Sequence from pathlib import Path from typing import Any, Final from pyk.kast import KInner + from pyk.kore.syntax import Axiom, Sentence from .smir import SMIRInfo @@ -117,7 +120,7 @@ def kompile_smir( target_dir.mkdir(parents=True, exist_ok=True) kmir = KMIR(HASKELL_DEF_DIR) - rules = _make_kore_rules(kmir, smir_info) + rules = make_kore_rules(kmir, smir_info) _LOGGER.info(f'Generated {len(rules)} function equations to add to `definition.kore') if symbolic: @@ -216,35 +219,142 @@ def kompile_smir( return KompiledConcrete(llvm_dir=target_llvm_path) -def _make_kore_rules(kmir: KMIR, smir_info: SMIRInfo) -> list[str]: # generates kore syntax directly as string +def _make_stratified_rules( + kmir: KMIR, + fun: str, + arg_sort: str, + result_sort: str, + id_cons: str, + assocs: list[tuple[int, KInner]], + not_found: KInner, + strata: int = 10, +) -> Sequence[Sentence]: + from pyk.kore.prelude import int_dv + from pyk.kore.rule import FunctionRule + + int_eqls = "Lbl'UndsEqlsEqls'Int'Unds'" + int_tmod = "Lbl'UndsPerc'Int'Unds'" + + arg_sort_kore = SortApp('Sort' + arg_sort) + int_sort_kore = SortApp('SortInt') + result_sort_kore = SortApp('Sort' + result_sort) + id_cons_kore = 'Lbl' + id_cons + + declarations = [ + # declare stratified functions + SymbolDecl( + symbol=Symbol('Lbl' + fun + str(i)), + param_sorts=(int_sort_kore,), + sort=result_sort_kore, + attrs=( + App('function'), + App('total'), + ), + ) + for i in range(strata) + ] + dispatch = [ + # define dispatch equations to stratified functions + # f'rule {fun}({id_cons}(N) => {fun}{i}(N)) requires N /Int {strata} ==Int {i}' + FunctionRule( + App('Lbl' + fun, (), (App(id_cons_kore, (), (EVar('VarN', int_sort_kore),)),)), + App('Lbl' + fun + str(i), (), (EVar('VarN', int_sort_kore),)), + App(int_eqls, (), [App(int_tmod, (), [EVar('VarN', int_sort_kore), int_dv(strata)]), int_dv(i)]), + None, + result_sort_kore, + (arg_sort_kore,), + None, + 0, + f'{fun}{i}-dispatch', + f'{fun}{i}-dispatch', + ) + .to_axiom() + .let_attrs((App("UNIQUE'Unds'ID", (), (String(f'{fun}{i}-dispatch'),)),)) + for i in range(strata) + ] + defaults = [ + # define dispatch equations to stratified functions + # f'rule {fun}{i}(N) => {default} [owise]' + FunctionRule( + App('Lbl' + fun + str(i), (), (EVar('VarN', SortApp('SortInt')),)), + kmir.kast_to_kore(not_found, KSort(result_sort)), + None, + None, + result_sort_kore, + (int_sort_kore,), + None, + 200, + f'{fun}{i}-default', + f'{fun}{i}-default', + ) + .to_axiom() + .let_attrs((App('owise'), App("UNIQUE'Unds'ID", (), (String(f'{fun}{i}-default'),)))) + for i in range(strata) + ] equations = [] + for i, result in assocs: + m = i % strata + equations.append( + _mk_equation(kmir, fun + str(m), intToken(i), 'Int', result, result_sort).let_attrs( + (App("UNIQUE'Unds'ID", (), (String(f'{fun}{m}-{i}-generated'),)),) + ) + ) + return [*declarations, *dispatch, *defaults, *equations] + +def make_kore_rules(kmir: KMIR, smir_info: SMIRInfo) -> Sequence[Sentence]: # kprint tool is too chatty kprint_logger = logging.getLogger('pyk.ktool.kprint') kprint_logger.setLevel(logging.WARNING) + unknown_function = KApply( + 'MonoItemKind::MonoItemFn', + ( + KApply('symbol(_)_LIB_Symbol_String', (KToken('"** UNKNOWN FUNCTION **"', KSort('String')),)), + KApply('defId(_)_BODY_DefId_Int', (KVariable('TY', KSort('Int')),)), + KApply('noBody_BODY_MaybeBody', ()), + ), + ) + default_function = _mk_equation( + kmir, 'lookupFunction', KApply('ty', (KVariable('TY'),)), 'Ty', unknown_function, 'MonoItemKind' + ).let_attrs(((App('owise')),)) + + equations: list[Axiom] = [default_function] + for fty, kind in _functions(kmir, smir_info).items(): equations.append( _mk_equation(kmir, 'lookupFunction', KApply('ty', (intToken(fty),)), 'Ty', kind, 'MonoItemKind') ) - types: set[KInner] = set() - for type in smir_info._smir['types']: - parse_result = kmir.parser.parse_mir_json(type, 'TypeMapping') - assert parse_result is not None - type_mapping, _ = parse_result - assert isinstance(type_mapping, KApply) and len(type_mapping.args) == 2 - ty, tyinfo = type_mapping.args - if ty in types: - raise ValueError(f'Key collision in type map: {ty}') - types.add(ty) - equations.append(_mk_equation(kmir, 'lookupTy', ty, 'Ty', tyinfo, 'TypeInfo')) + # stratify type and alloc lookups + def get_int_arg(app: KInner) -> int: + match app: + case KApply(_, args=(KToken(token=int_arg, sort=KSort('Int')),)): + return int(int_arg) + case _: + raise Exception(f'Cannot extract int arg from {app}') + + invalid_type = KApply('TypeInfo::VoidType', ()) + parsed_types = [kmir.parser.parse_mir_json(type, 'TypeMapping') for type in smir_info._smir['types']] + type_mappings = [type_mapping for type_mapping, _ in (result for result in parsed_types if result is not None)] + + type_assocs = [ + (get_int_arg(ty), info) + for ty, info in (type_mapping.args for type_mapping in type_mappings if isinstance(type_mapping, KApply)) + ] - for alloc in smir_info._smir['allocs']: - alloc_id, value = _decode_alloc(smir_info=smir_info, raw_alloc=alloc) - equations.append(_mk_equation(kmir, 'lookupAlloc', alloc_id, 'AllocId', value, 'Evaluation')) + type_equations = _make_stratified_rules(kmir, 'lookupTy', 'Ty', 'TypeInfo', 'ty', type_assocs, invalid_type) - return equations + invalid_alloc_n = KApply( + 'InvalidAlloc(_)_RT-VALUE-SYNTAX_Evaluation_AllocId', (KApply('allocId', (KVariable('N'),)),) + ) + decoded_allocs = [_decode_alloc(smir_info=smir_info, raw_alloc=alloc) for alloc in smir_info._smir['allocs']] + allocs = [(get_int_arg(alloc_id), value) for (alloc_id, value) in decoded_allocs] + alloc_equations = _make_stratified_rules( + kmir, 'lookupAlloc', 'AllocId', 'Evaluation', 'allocId', allocs, invalid_alloc_n + ) + + return [*equations, *type_equations, *alloc_equations] def _functions(kmir: KMIR, smir_info: SMIRInfo) -> dict[int, KInner]: @@ -275,7 +385,7 @@ def _functions(kmir: KMIR, smir_info: SMIRInfo) -> dict[int, KInner]: return functions -def _mk_equation(kmir: KMIR, fun: str, arg: KInner, arg_sort: str, result: KInner, result_sort: str) -> str: +def _mk_equation(kmir: KMIR, fun: str, arg: KInner, arg_sort: str, result: KInner, result_sort: str) -> Axiom: from pyk.kore.rule import FunctionRule from pyk.kore.syntax import App, SortApp @@ -296,17 +406,7 @@ def _mk_equation(kmir: KMIR, fun: str, arg: KInner, arg_sort: str, result: KInne uid='fubar', label='fubaz', ) - return '\n'.join( - [ - '', - '', - ( - f'// {fun}({kmir.pretty_print(arg)})' - + (f' => {kmir.pretty_print(result)}' if len(kmir.pretty_print(result)) < 1000 else '') - ), - rule.to_axiom().text, - ] - ) + return rule.to_axiom() def _decode_alloc(smir_info: SMIRInfo, raw_alloc: Any) -> tuple[KInner, KInner]: @@ -326,7 +426,7 @@ def _decode_alloc(smir_info: SMIRInfo, raw_alloc: Any) -> tuple[KInner, KInner]: return alloc_id_term, value.to_kast() -def _insert_rules_and_write(input_file: Path, rules: list[str], output_file: Path) -> None: +def _insert_rules_and_write(input_file: Path, rules: Sequence[Sentence], output_file: Path) -> None: with open(input_file, 'r') as f: lines = f.readlines() @@ -337,7 +437,7 @@ def _insert_rules_and_write(input_file: Path, rules: list[str], output_file: Pat # Insert rules before the endmodule line new_lines.append(f'\n// Generated from file {input_file}\n\n') - new_lines.extend(rules) + new_lines.extend([ax.text + '\n' for ax in rules]) new_lines.append('\n' + last_line) # Write to output file From 4da04400fc8479cb95ebe7c0ba00701dffdafcf1 Mon Sep 17 00:00:00 2001 From: Jost Berthold Date: Thu, 4 Dec 2025 12:35:16 +1100 Subject: [PATCH 2/2] Special fix-up rule to apply field projection on array head elements (#876) When using pointer offsets followed by dereferencing, the returned result is an array (subslice of the original). In some cases, the value is used as a single element, for instance by projecting out a field. This PR adds a special rule for the field projection case to proceed using the head element in those cases. Related: #771 --- kmir/src/kmir/kdist/mir-semantics/rt/data.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md index 29ca416c8..f392edda3 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/data.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/data.md @@ -469,6 +469,21 @@ The context is populated with the correct field access data, so that write-backs [preserves-definedness, priority(100)] ``` +A somewhat dual case to this rule can occur when a pointer into an array of data elements has been offset and is then dereferenced. +The dereferenced pointer may either point to a subslice or to a single element (depending on context). +Therefore, a field projection may be found which has to be applied to the head element of an array. +The following rule resolves this situation by using the head element. + +```k + rule #traverseProjection( + DEST, + Range(ListItem(Aggregate(_, _) #as VALUE) _REST:List), + projectionElemField(IDX, TY) PROJS, + CTXTS + ) + => #traverseProjection(DEST, VALUE, projectionElemField(IDX, TY) PROJS, CTXTS) ... // TODO mark context? + [preserves-definedness, priority(100)] +``` #### Unions ```k