diff --git a/Strata/Languages/FineGrainLaurel/Elaborate.lean b/Strata/Languages/FineGrainLaurel/Elaborate.lean new file mode 100644 index 0000000000..fc70385741 --- /dev/null +++ b/Strata/Languages/FineGrainLaurel/Elaborate.lean @@ -0,0 +1,1662 @@ +/- + Copyright Strata Contributors + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module + +import Strata.Languages.FineGrainLaurel.FineGrainLaurel +public import Strata.Languages.Laurel.Laurel +public import Strata.Languages.Laurel.HeapParameterizationConstants +public import Strata.Languages.Laurel.CoreDefinitionsForLaurel + +/-! +# Pass 3: Elaboration + +Elaboration transforms Laurel programs (impure CBV, effects implicit) into +Laurel programs where effects are explicit via calling conventions. The +theoretical foundation is **Fine-Grain Call-By-Value** (FGCBV) with graded +effects and bidirectional typing. + +## Why FGCBV? + +In plain CBV, every expression can have effects. You cannot tell by looking +at `f(x, g(y))` whether `g(y)` allocates, throws, or is pure. This matters +for verification because the calling convention depends on it: a pure call +returns a value directly; an effectful call returns through output parameters +(heap, error status). + +FGCBV separates **values** (pure, duplicable) from **producers** (effectful, +sequenced). A producer must be explicitly sequenced — this makes the +elaborator syntax-directed. At every point, the structure of the term tells +you whether you are looking at a value or a producer. + +## Bidirectional Typing + +The elaborator has three mutually recursive functions: + +- `synthValue`: value synthesis — literals, variables, pure calls, field access +- `checkValue`: value checking — synthesize then coerce (the ONE place subsumption lives) +- `checkProducer`: producer checking — if, while, assign, block, exit, assert, etc. + +Values synthesize their types bottom-up. Producers are checked against an +ambient grade and output type top-down. The mode discipline guarantees +deterministic choices at every point. + +## Graded Effects + +Each producer carries a grade from `{pure, proc, err, heap, heapErr}`. The +grade determines the calling convention (extra heap parameters, error outputs). +Grade inference proceeds by coinduction over the call graph: try each grade +from `pure` upward, the first that succeeds is the procedure's grade. + +## Two Passes + +1. **Grade inference** (coinductive fixpoint): for each user procedure, find + the minimal grade at which elaboration succeeds. +2. **Term production**: elaborate each procedure at its inferred grade, + project the FGCBV term back to Laurel statements. +-/ + +namespace Strata.FineGrainLaurel +open Strata.Laurel +public section + +/-! ## Internal Types + +Elaboration builds its own environment from `Laurel.Program` declarations. +Ideally call sites would carry callee signatures directly (no lookup needed), +but the Laurel AST uses string-named `StaticCall` nodes. -/ + +/-- Elaboration's internal function signature (built from Laurel.Procedure declarations). -/ +structure FuncSig where + /-- Procedure name (string, matching StaticCall callee names). -/ + name : String + /-- Input parameters as (name, type) pairs. -/ + params : List (String × HighType) + /-- Return type (first non-error output). -/ + returnType : HighType + +instance : Inhabited FuncSig where + default := { name := "", params := [], returnType := .TCore "Any" } + +/-- What a name resolves to in Elaboration's type environment. -/ +inductive NameInfo where + /-- A callable procedure with its signature. -/ + | function (sig : FuncSig) + /-- A variable binding with its type. -/ + | variable (ty : HighType) + +instance : Inhabited NameInfo where + default := .variable (.TCore "Any") + +/-- The typing environment: maps names to their info and class names to field lists. -/ +structure ElabTypeEnv where + /-- All known names (procedures, variables, datatype constructors). -/ + names : Std.HashMap String NameInfo := {} + /-- Class fields: class name -> list of (field name, field type). -/ + classFields : Std.HashMap String (List (String × HighType)) := {} + deriving Inhabited + +/-- Builds the type environment from a Laurel program's declarations. Scans all + procedures (user + runtime) for signatures, all types for class fields. -/ +def buildElabEnvFromProgram (program : Laurel.Program) (runtime : Laurel.Program := default) : ElabTypeEnv := Id.run do + let mut names : Std.HashMap String NameInfo := {} + let mut classFields : Std.HashMap String (List (String × HighType)) := {} + for proc in program.staticProcedures ++ runtime.staticProcedures do + let params := proc.inputs.map fun p => (p.name.text, p.type.val) + let retTy := match proc.outputs.head? with + | some o => o.type.val | none => HighType.TVoid + names := names.insert proc.name.text (.function { name := proc.name.text, params, returnType := retTy }) + for td in program.types ++ runtime.types do + match td with + | .Composite ct => + let fields := ct.fields.map fun f => (f.name.text, f.type.val) + classFields := classFields.insert ct.name.text fields + | .Datatype dt => + for ctor in dt.constructors do + let ctorParams := ctor.args.map fun p => (p.name.text, p.type.val) + let retTy := HighType.UserDefined { text := dt.name.text, uniqueId := none } + names := names.insert ctor.name.text (.function { name := ctor.name.text, params := ctorParams, returnType := retTy }) + | .Constrained _ => pure () + { names, classFields } + +def mkLaurel (md : Imperative.MetaData Core.Expression) (e : StmtExpr) : StmtExprMd := + { val := e, md := md } +def mkHighTypeMd (md : Imperative.MetaData Core.Expression) (ty : HighType) : HighTypeMd := + { val := ty, md := md } + +/-! ## The Grade Monoid + +Grades classify which effects a producer performs. The monoid structure +ensures compositionality: sequencing two producers joins their grades. +The left residual `d \ e` ("what grade remains for the continuation after +a call at grade `d` within ambient grade `e`") drives grade inference — +if `d \ e` is undefined (d > e), elaboration fails and the grade is +pushed upward. -/ + +/-- The effect grade lattice: pure < proc < {err, heap} < heapErr. -/ +inductive Grade where + /-- No effects. Value-level `staticCall`, no extra params. -/ + | pure + /-- Effectful but no error or heap. Outputs: `[result]`. -/ + | proc + /-- May throw. Outputs: `[result, maybe_except]`. -/ + | err + /-- Reads/writes heap. Inputs: `[$heap]`. Outputs: `[$heap, result]`. -/ + | heap + /-- Heap + error. Inputs: `[$heap]`. Outputs: `[$heap, result, maybe_except]`. -/ + | heapErr + deriving Inhabited, BEq, Repr + +/-- Join (least upper bound) of two grades. Sequencing two producers joins their grades. -/ +def Grade.join : Grade → Grade → Grade + | .pure, e => e | e, .pure => e + | .proc, .proc => .proc + | .proc, .err => .err | .err, .proc => .err + | .proc, .heap => .heap | .heap, .proc => .heap + | .proc, .heapErr => .heapErr | .heapErr, .proc => .heapErr + | .err, .err => .err + | .err, .heap => .heapErr | .heap, .err => .heapErr + | .err, .heapErr => .heapErr | .heapErr, .err => .heapErr + | .heap, .heap => .heap + | .heap, .heapErr => .heapErr | .heapErr, .heap => .heapErr + | .heapErr, .heapErr => .heapErr + +/-- Left residual: `d\e` = grade for the continuation after a call at grade `d` + within ambient grade `e`. Returns `none` if `d > e` (elaboration fails). + + Satisfies the residuation law for an idempotent semilattice: + `d ⊔ x ≤ e` iff `x ≤ d\e`. Since `⊔` is idempotent (join), + the largest `x` with `d ⊔ x ≤ e` is `e` itself (when `d ≤ e`). + So `d\e = e` whenever `d ≤ e`, and undefined otherwise. +``` +d\e = e if d ≤ e +d\e = ⊥ otherwise +``` +-/ +def Grade.leftResidual : Grade → Grade → Option Grade + | .pure, e => some e + | .proc, e => if e == .pure then none else some e + | .err, e => match e with | .err | .heapErr => some e | _ => none + | .heap, e => match e with | .heap | .heapErr => some e | _ => none + | .heapErr, .heapErr => some .heapErr + | _, _ => none + +/-! ## Type Erasure + +Elaboration operates on `LowType` — the erased version of `HighType`. +User-defined types erase to `Composite` (they live on the heap). The +subtyping/coercion system operates on `LowType` values. -/ + +/-- The erased type system. User-defined types become `Composite` (heap-allocated). + Subsumption and coercion operate on `LowType` values. -/ +inductive LowType where + /-- Machine integer. -/ + | TInt + /-- Boolean. -/ + | TBool + /-- String. -/ + | TString + /-- 64-bit float. -/ + | TFloat64 + /-- Unit/void. -/ + | TVoid + /-- Named core type (Any, Error, Heap, Composite, ListAny, DictStrAny, etc.). -/ + | TCore (name : String) + deriving Inhabited, Repr, BEq + +/-- Type erasure: HighType -> LowType. Primitives map directly, user-defined types + become Composite, unknown/complex types become Any. -/ +def eraseType : HighType → LowType + | .TInt => .TInt | .TBool => .TBool | .TString => .TString + | .TFloat64 => .TFloat64 | .TVoid => .TVoid | .TCore n => .TCore n + | .UserDefined id => match id.text with + | "Any" => .TCore "Any" | "Error" => .TCore "Error" + | "ListAny" => .TCore "ListAny" | "DictStrAny" => .TCore "DictStrAny" + | "OptionInt" => .TCore "OptionInt" + | "Box" => .TCore "Box" | "Field" => .TCore "Field" | "TypeTag" => .TCore "TypeTag" + | _ => .TCore "Composite" + | .THeap => .TCore "Heap" + | .TReal => .TCore "real" | .TTypedField _ => .TCore "Field" + | .TSet _ | .TMap _ _ | .Applied _ _ | .Intersection _ | .Unknown => .TCore "Any" + | .Pure _ => .TCore "Composite" + +/-- Inverse of erasure (partial): lifts a LowType back to HighType for env extension. -/ +def liftType : LowType → HighType + | .TInt => .TInt | .TBool => .TBool | .TString => .TString + | .TFloat64 => .TFloat64 | .TVoid => .TVoid | .TCore n => .TCore n + +/-! ## FGL Terms + +The intermediate representation between Laurel input and Laurel output. +Values are pure (can appear in any context). Producers are effectful +(must be sequenced). Every constructor carries source metadata so +provenance is preserved through elaboration. -/ + +abbrev Md := Imperative.MetaData Core.Expression + +/-- A pure value in the FGCBV intermediate term. Can appear in any context. + Every constructor carries source metadata for provenance. -/ +inductive FGLValue where + /-- Integer literal. -/ + | litInt (md : Md) (n : Int) + /-- Boolean literal. -/ + | litBool (md : Md) (b : Bool) + /-- String literal. -/ + | litString (md : Md) (s : String) + /-- Variable reference. -/ + | var (md : Md) (name : String) + /-- Coercion: int → Any. -/ + | fromInt (md : Md) (inner : FGLValue) + /-- Coercion: string → Any. -/ + | fromStr (md : Md) (inner : FGLValue) + /-- Coercion: bool → Any. -/ + | fromBool (md : Md) (inner : FGLValue) + /-- Coercion: float → Any. -/ + | fromFloat (md : Md) (inner : FGLValue) + /-- Coercion: Composite → Any. -/ + | fromComposite (md : Md) (inner : FGLValue) + /-- Coercion: ListAny → Any. -/ + | fromListAny (md : Md) (inner : FGLValue) + /-- Coercion: DictStrAny → Any. -/ + | fromDictStrAny (md : Md) (inner : FGLValue) + /-- Coercion: None → Any. -/ + | fromNone (md : Md) + /-- Field access (pre-heap-resolution). -/ + | fieldAccess (md : Md) (obj : FGLValue) (field : String) + /-- Pure function call. -/ + | staticCall (md : Md) (name : String) (args : List FGLValue) + deriving Inhabited + +def FGLValue.getMd : FGLValue → Md + | .litInt md _ | .litBool md _ | .litString md _ | .var md _ + | .fromInt md _ | .fromStr md _ | .fromBool md _ | .fromFloat md _ + | .fromComposite md _ | .fromListAny md _ | .fromDictStrAny md _ | .fromNone md + | .fieldAccess md _ _ | .staticCall md _ _ => md + +/-- An effectful producer in the FGCBV intermediate term. Must be sequenced. + Each form carries a continuation (`body`/`after`) — the CPS structure + makes projection to Laurel statements trivial. -/ +inductive FGLProducer where + /-- Return a value (terminal — no continuation). -/ + | produce (md : Md) (v : FGLValue) + /-- Assign to an existing variable, then continue. RHS is a producer whose + resolved value is assigned to target. -/ + | assign (md : Md) (target : FGLValue) (val : FGLProducer) (body : FGLProducer) + /-- Declare a local variable, then continue in extended scope. Init is a + producer whose resolved value initializes the variable. -/ + | varDecl (md : Md) (name : String) (ty : LowType) (init : FGLProducer) (body : FGLProducer) + /-- Conditional: check condition, branch, then continue after. -/ + | ifThenElse (md : Md) (cond : FGLValue) (thn : FGLProducer) (els : FGLProducer) (after : FGLProducer) + /-- Loop: check condition, iterate body, then continue after. -/ + | whileLoop (md : Md) (cond : FGLValue) (body : FGLProducer) (after : FGLProducer) + /-- Assert condition holds, then continue. -/ + | assert (md : Md) (cond : FGLValue) (body : FGLProducer) + /-- Assume condition holds, then continue. -/ + | assume (md : Md) (cond : FGLValue) (body : FGLProducer) + /-- Effectful call: bind outputs, then continue in extended scope. -/ + | procedureCall (md : Md) (callee : String) (args : List FGLValue) + (outputs : List (String × LowType)) (body : FGLProducer) + /-- Exit to enclosing labeled block (non-returning). -/ + | exit (md : Md) (label : String) + /-- Labeled block: body may exit to label, then continue after. -/ + | labeledBlock (md : Md) (label : String) (body : FGLProducer) (after : FGLProducer) + /-- Empty continuation (end of block). -/ + | skip + deriving Inhabited + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Monad +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Reader environment for elaboration. Carries the type environment, program, + runtime, inferred grades, and current procedure's input list (for hole args). -/ +structure ElabEnv where + /-- The typing context (names + class fields). -/ + typeEnv : ElabTypeEnv + /-- The user program being elaborated. -/ + program : Laurel.Program + /-- The runtime prelude (builtins, data structure operations). -/ + runtime : Laurel.Program := default + /-- Inferred grades for all procedures. -/ + procGrades : Std.HashMap String Grade := {} + /-- Current procedure's input params (used as hole arguments). -/ + procInputs : List (String × HighType) := [] + +/-- Mutable state for elaboration: fresh name counter, current heap variable name, + and collectors for box constructors and holes used (emitted as declarations). -/ +structure ElabState where + /-- Counter for generating fresh variable names. -/ + freshCounter : Nat := 0 + /-- Current heap variable name (updated after each heap-writing call). -/ + heapVar : Option String := none + /-- Box constructors used (emitted as datatype constructors in output). -/ + usedBoxConstructors : List (String × String × HighType) := [] + /-- Hole functions used (emitted as opaque procedure declarations in output). -/ + usedHoles : List (String × Bool × HighType) := [] + +abbrev ElabM := ReaderT ElabEnv (StateT ElabState Option) + +private def freshVar (pfx : String := "tmp") : ElabM String := do + let s ← get; set { s with freshCounter := s.freshCounter + 1 }; pure s!"{pfx}${s.freshCounter}" + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Box protocol (type-directed) +-- Architecture §"Heap Field Access" +-- ═══════════════════════════════════════════════════════════════════════════════ + +def boxConstructorName (ty : HighType) : String := + match ty with + | .TInt => "BoxInt" | .TBool => "BoxBool" | .TFloat64 => "BoxFloat64" + | .TReal => "BoxReal" | .TString => "BoxString" + | .UserDefined _ => "BoxComposite" + | .TCore "Any" => "BoxAny" + | .TCore name => s!"Box..{name}" + | _ => "BoxComposite" + +def boxDestructorName (ty : HighType) : String := + match ty with + | .TInt => "Box..intVal!" | .TBool => "Box..boolVal!" | .TFloat64 => "Box..float64Val!" + | .TReal => "Box..realVal!" | .TString => "Box..stringVal!" + | .UserDefined _ => "Box..compositeVal!" + | .TCore "Any" => "Box..AnyVal!" + | .TCore name => s!"Box..{name}Val!" + | _ => "Box..compositeVal!" + +def boxFieldName (ty : HighType) : String := + match ty with + | .TInt => "intVal" | .TBool => "boolVal" | .TFloat64 => "float64Val" + | .TReal => "realVal" | .TString => "stringVal" + | .UserDefined _ => "compositeVal" + | .TCore "Any" => "AnyVal" + | .TCore name => s!"{name}Val" + | _ => "compositeVal" + +def boxFieldType (ty : HighType) : HighType := + match ty with + | .UserDefined _ => .UserDefined (Identifier.mk "Composite" none) + | other => other + +def recordBoxUse (ty : HighType) : ElabM Unit := do + let ctor := boxConstructorName ty + let existing := (← get).usedBoxConstructors + unless existing.any (fun (c, _, _) => c == ctor) do + modify fun s => { s with usedBoxConstructors := s.usedBoxConstructors ++ [(ctor, boxDestructorName ty, ty)] } + +/-- Reads a runtime procedure's grade structurally from its signature: does it + have a Heap input? An Error output? The combination determines the grade. + User procedure grades are inferred by coinduction, not read from signature. -/ +def gradeFromSignature (proc : Laurel.Procedure) : Grade := + let hasError := proc.outputs.any fun o => eraseType o.type.val == .TCore "Error" + let hasHeap := proc.inputs.any fun i => eraseType i.type.val == .TCore "Heap" + match hasHeap, hasError with + | true, true => .heapErr + | true, false => .heap + | false, true => .err + | false, false => if proc.isFunctional then .pure else .proc + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Env helpers +-- ═══════════════════════════════════════════════════════════════════════════════ + +def lookupEnv (name : String) : ElabM NameInfo := do + match (← read).typeEnv.names[name]? with | some info => pure info | none => dbg_trace s!"lookupEnv: {name} not found"; failure +def extendEnv (name : String) (ty : HighType) (action : ElabM α) : ElabM α := + withReader (fun env => { env with typeEnv := { env.typeEnv with names := env.typeEnv.names.insert name (.variable ty) } }) action +def lookupFuncSig (name : String) : ElabM FuncSig := do + match (← read).typeEnv.names[name]? with | some (.function sig) => pure sig | _ => failure +def lookupFieldType (className fieldName : String) : ElabM HighType := do + match (← read).typeEnv.classFields[className]? with + | some fields => match fields.find? (fun (n, _) => n == fieldName) with + | some (_, ty) => pure ty + | none => failure + | none => failure + +/-! ## HOAS Smart Constructors + +These construct effectful call nodes using higher-order abstract syntax: +the continuation is a Lean function from fresh output variables to the +body producer. This ensures output variables are always correctly scoped +(extended in the environment before the body is elaborated). -/ + +def mkEffectfulCall (md : Md) (callee : String) (args : List FGLValue) + (outputSpecs : List (String × HighType)) + (body : List FGLValue → ElabM FGLProducer) : ElabM FGLProducer := do + let mut names : List String := [] + let mut lowOutputs : List (String × LowType) := [] + for (pfx, ty) in outputSpecs do + let n ← freshVar pfx + names := names ++ [n] + lowOutputs := lowOutputs ++ [(n, eraseType ty)] + let vars := names.map (FGLValue.var md) + let cont ← names.zip (outputSpecs.map (·.2)) |>.foldr + (fun (n, ty) acc => extendEnv n ty acc) (body vars) + pure (.procedureCall md callee args lowOutputs cont) + +def mkVarDecl (md : Md) (name : String) (ty : LowType) (init : FGLProducer) + (body : FGLValue → ElabM FGLProducer) : ElabM FGLProducer := do + let cont ← extendEnv name (liftType ty) (body (.var md name)) + pure (.varDecl md name ty init cont) + +/-- Subgrading witness: `d ≤ e ↦ (pre, outs)`. Constructs a `procedureCall` + with the correct calling convention based on grade. +``` +d ≤ e ↦ (args_prepended, outputs_declared, resultIdx) + +pure: ([], [], —) — value-level, no procedureCall +proc: ([], [result:B], 0) +err: ([], [result:B, except:Error], 0) +heap: ([heap_var], [heap:Heap, result:B], 1) +heapErr: ([heap_var], [heap:Heap, result:B, except:Error], 1) +``` +-/ +def mkGradedCall (md : Md) (callee : String) (args : List FGLValue) + (declaredOutputs : List (String × HighType)) (callGrade : Grade) + (body : FGLValue → ElabM FGLProducer) : ElabM FGLProducer := do + let actualArgs ← if callGrade == .heap || callGrade == .heapErr then do + let hv := (← get).heapVar + let heapArg := match hv with | some h => FGLValue.var md h | none => FGLValue.var md "$heap" + pure (heapArg :: args) + else pure args + mkEffectfulCall md callee actualArgs declaredOutputs fun outs => do + if callGrade == .heap || callGrade == .heapErr then + match outs[0]? with + | some v => match v with | .var _ n => modify fun s => { s with heapVar := some n } | _ => pure () + | none => pure () + let resultVar := match callGrade with + | .heap | .heapErr => outs[1]? + | _ => outs[0]? + match resultVar with + | some rv => body rv + | none => body (.fromNone md) + +/-! ## Subsumption + +A subtyping judgment `A <= B` has a witness: a coercion function. Upward +coercions (T <= Any) are value constructors (boxing). Downward coercions +(Any <= T) are pure function calls (unboxing). `applySubtype` is called +ONLY from `checkValue` — this is the bidirectional discipline. -/ + +/-- The result of a subsumption check: identity (types equal), a coercion witness + (function to apply), or unrelated (no subtyping relationship). -/ +inductive CoercionResult where + /-- Types are equal — no coercion needed. -/ + | refl + /-- Subtyping holds — apply this coercion function. -/ + | coerce (w : Md → FGLValue → FGLValue) + /-- No subtyping relationship. -/ + | unrelated + deriving Inhabited + +/-- Subtyping judgment `A ≤ B ↦ c` as a total case analysis: every `(A, B)` pair +is decided. `.refl` when `A = B`; `.coerce w` when Python implicitly converts +`A → B`, witnessed by one direct runtime function; `.unrelated` otherwise — a +deliberate verdict, never a forgotten case. `TCore` names outside the finite set +`eraseType` produces are `.unrelated` (sound default for an unknown type). +``` +A ≤ A ↦ id (reflexivity) + +box T ≤ Any: TInt↦fromInt TBool↦fromBool TString↦fromStr TFloat64↦fromFloat + Composite↦fromComposite ListAny↦fromListAny + DictStrAny↦fromDictStrAny TVoid↦fromNone +unbox Any ≤ T: bool↦Any_to_bool int↦as_int! str↦as_string! float↦as_float! + Composite↦as_Composite! DictStrAny↦as_Dict! ListAny↦as_ListAny! +truth T ≤ bool: TInt↦int_to_bool TString↦str_to_bool TFloat64↦float_to_bool + ListAny↦list_to_bool DictStrAny↦dict_to_bool + TVoid↦false Composite↦true +num bool≤int≤float: TBool↦int bool_to_int TInt↦float int_to_real + TBool↦float bool_to_real +``` +-/ +def subtype (actual expected : LowType) : CoercionResult := + if actual == expected then .refl else match expected, actual with + -- box: T ≤ Any + | .TCore "Any", .TInt => .coerce (fun md => .fromInt md) + | .TCore "Any", .TBool => .coerce (fun md => .fromBool md) + | .TCore "Any", .TString => .coerce (fun md => .fromStr md) + | .TCore "Any", .TFloat64 => .coerce (fun md => .fromFloat md) + | .TCore "Any", .TCore "Composite" => .coerce (fun md => .fromComposite md) + | .TCore "Any", .TCore "ListAny" => .coerce (fun md => .fromListAny md) + | .TCore "Any", .TCore "DictStrAny" => .coerce (fun md => .fromDictStrAny md) + | .TCore "Any", .TVoid => .coerce (fun md _ => .fromNone md) + | .TCore "Any", _ => .unrelated + -- to bool: unbox from Any, else per-type truthiness + | .TBool, .TCore "Any" => .coerce (fun md v => .staticCall md "Any_to_bool" [v]) + | .TBool, .TInt => .coerce (fun md v => .staticCall md "int_to_bool" [v]) + | .TBool, .TString => .coerce (fun md v => .staticCall md "str_to_bool" [v]) + | .TBool, .TFloat64 => .coerce (fun md v => .staticCall md "float_to_bool" [v]) + | .TBool, .TCore "ListAny" => .coerce (fun md v => .staticCall md "list_to_bool" [v]) + | .TBool, .TCore "DictStrAny" => .coerce (fun md v => .staticCall md "dict_to_bool" [v]) + | .TBool, .TVoid => .coerce (fun md _ => .litBool md false) + | .TBool, .TCore "Composite" => .coerce (fun md _ => .litBool md true) + | .TBool, _ => .unrelated + -- to int: unbox from Any, else bool widening + | .TInt, .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_int!" [v]) + | .TInt, .TBool => .coerce (fun md v => .staticCall md "bool_to_int" [v]) + | .TInt, _ => .unrelated + -- to float: unbox from Any, else int/bool widening + | .TFloat64, .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_float!" [v]) + | .TFloat64, .TInt => .coerce (fun md v => .staticCall md "int_to_real" [v]) + | .TFloat64, .TBool => .coerce (fun md v => .staticCall md "bool_to_real" [v]) + | .TFloat64, _ => .unrelated + -- to string: unbox from Any + | .TString, .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_string!" [v]) + | .TString, _ => .unrelated + -- to container/Composite: unbox from Any + | .TCore "Composite", .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_Composite!" [v]) + | .TCore "DictStrAny", .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_Dict!" [v]) + | .TCore "ListAny", .TCore "Any" => .coerce (fun md v => .staticCall md "Any..as_ListAny!" [v]) + | _, _ => .unrelated + +/-- Apply the coercion witness for `actual <= expected` to a value. Identity if equal. -/ +def applySubtype (val : FGLValue) (actual expected : LowType) : FGLValue := + match subtype actual expected with | .refl => val | .coerce c => c val.getMd val | .unrelated => val + +/-! ## The Translation ⟦·⟧ : Laurel → GFGL + +Three functions: synthValue (⟦·⟧⇒ᵥ), checkValue (⟦·⟧⇐ᵥ), checkProducer (⟦·⟧⇐ₚ). +Entry point is checkProducer — every Laurel derivation maps to a GFGL producer. +synthValue/checkValue are internal helpers for building value sub-terms. +Producer synthesis (⟦·⟧⇒ₚ) is applied by inversion inside the call clause. -/ + +-- Look up a proc's declared outputs, accounting for signature rewriting. +partial def lookupProcOutputs (callee : String) : ElabM (List (String × HighType)) := do + let env ← read + let g := env.procGrades[callee]?.getD .pure + let findProc (procs : List Laurel.Procedure) : Option Laurel.Procedure := + procs.find? (fun p => p.name.text == callee) + match findProc env.runtime.staticProcedures with + | some proc => pure (proc.outputs.map fun o => (o.name.text, o.type.val)) + | none => match findProc env.program.staticProcedures with + | some proc => + let resultOutputs := proc.outputs.filter fun o => eraseType o.type.val != .TCore "Error" + let resultList := resultOutputs.map fun o => (o.name.text, o.type.val) + match g with + | .heap => pure ([("$heap", .THeap)] ++ resultList) + | .heapErr => pure ([("$heap", .THeap)] ++ resultList ++ [("maybe_except", .TCore "Error")]) + | .err => pure (resultList ++ [("maybe_except", .TCore "Error")]) + | _ => pure (proc.outputs.map fun o => (o.name.text, o.type.val)) + | none => failure + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- The Translation ⟦·⟧ : Laurel → GFGL +-- +-- Three functions: synthValue (⟦·⟧⇒ᵥ), checkValue (⟦·⟧⇐ᵥ), checkProducer (⟦·⟧⇐ₚ) +-- Entry point is checkProducer. synthValue/checkValue are internal helpers. +-- Producer synthesis (⟦·⟧⇒ₚ) is applied by inversion inside the call clause. +-- ═══════════════════════════════════════════════════════════════════════════════ + +mutual + +/-- ⟦·⟧⇒ᵥ (literal): +``` +D :: Γ ⊢ n : int [lit] + + ↦ + +⟦D⟧⇒ᵥ :: ⟦Γ⟧ ⊢ litInt n ⇒ TInt [litInt] +``` +(analogous for bool, string) +-/ +partial def synthValueLiteral (md : Md) (expr : StmtExpr) : Option (FGLValue × HighType) := + match expr with + | .LiteralInt n => some (.litInt md n, .TInt) + | .LiteralBool b => some (.litBool md b, .TBool) + | .LiteralString s => some (.litString md s, .TString) + | _ => none + +/-- ⟦·⟧⇒ᵥ (variable): +``` +D :: Γ ⊢ x : A [var, (x:A) ∈ Γ] + + ↦ + +⟦D⟧⇒ᵥ :: ⟦Γ⟧ ⊢ var x ⇒ ⟦A⟧ [var, (x:⟦A⟧) ∈ ⟦Γ⟧] +``` +-/ +partial def synthValueVar (md : Md) (id : Identifier) : ElabM (FGLValue × HighType) := do + match (← lookupEnv id.text) with + | .variable ty => pure (.var md id.text, ty) + | _ => dbg_trace s!"synthValueVar: {id.text} not a variable"; failure + +/-- ⟦·⟧⇒ᵥ (field access): +``` +D :: Γ ⊢ obj.f : T [fieldSelect] +└─ D_obj :: Γ ⊢ obj : C + + ↦ precondition: ($heap : Heap) ∈ ⟦Γ⟧ + +⟦D⟧⇒ᵥ :: ⟦Γ⟧ ⊢ functionCall unbox_T [functionCall readField [$heap, V_obj, $field.C.f]] ⇒ ⟦T⟧ [functionCall] +└─ ⟦Γ⟧ ⊢ functionCall readField [$heap, V_obj, $field.C.f] ⇐ Box [subsumption] + ├─ ⟦Γ⟧ ⊢ functionCall readField [$heap, V_obj, $field.C.f] ⇒ Box [functionCall] + │ ├─ ⟦Γ⟧ ⊢ $heap ⇐ Heap [subsumption] + │ │ ├─ ⟦Γ⟧ ⊢ $heap ⇒ Heap [var] + │ │ └─ Heap ≤ Heap ↦ id + │ ├─ ⟦D_obj⟧⇐ᵥ :: ⟦Γ⟧ ⊢ V_obj ⇐ Composite [subsumption] + │ │ ├─ ⟦D_obj⟧⇒ᵥ :: ⟦Γ⟧ ⊢ V_obj ⇒ Composite (since ⟦C⟧ = Composite for user-defined C) + │ │ └─ Composite ≤ Composite ↦ id + │ └─ ⟦Γ⟧ ⊢ functionCall $field.C.f [] ⇐ Field [subsumption] + │ ├─ ⟦Γ⟧ ⊢ functionCall $field.C.f [] ⇒ Field [functionCall] + │ └─ Field ≤ Field ↦ id + └─ Box ≤ Box ↦ id +``` +-/ +partial def synthValueFieldSelect (md : Md) (obj : StmtExprMd) (field : Identifier) : ElabM (FGLValue × HighType) := do + let (ov, objTy) ← synthValue obj + match (← get).heapVar with + | some hv => + let owner := match objTy with | .UserDefined id => some id.text | _ => none + match owner with + | some cn => + match (← read).typeEnv.classFields[cn]? with + | some _ => + let fieldTy ← lookupFieldType cn field.text + recordBoxUse fieldTy + let qualifiedName := "$field." ++ cn ++ "." ++ field.text + let compositeObj := applySubtype ov (eraseType objTy) (.TCore "Composite") + let read := FGLValue.staticCall md "readField" [.var md hv, compositeObj, .staticCall md qualifiedName []] + pure (.staticCall md (boxDestructorName fieldTy) [read], fieldTy) + | none => + let hv ← freshVar "havoc" + modify fun s => { s with usedHoles := s.usedHoles ++ [(hv, false, .TCore "Any")] } + pure (.staticCall md hv [], .TCore "Any") + | none => + let hv ← freshVar "havoc" + modify fun s => { s with usedHoles := s.usedHoles ++ [(hv, false, .TCore "Any")] } + pure (.staticCall md hv [], .TCore "Any") + | none => failure + +/-- ⟦·⟧⇒ᵥ (pure call): +``` +D :: Γ ⊢ f(e₁,…,eₙ) : B [call, f : (Aᵢ) → B & pure] +└─ D_i :: Γ ⊢ eᵢ : Aᵢ (for each i) + + ↦ + +⟦D⟧⇒ᵥ :: ⟦Γ⟧ ⊢ functionCall f [V₁,…,Vₙ] ⇒ ⟦B⟧ [functionCall] +└─ ⟦D_i⟧⇐ᵥ :: ⟦Γ⟧ ⊢ Vᵢ ⇐ ⟦Aᵢ⟧ (for each i) [subsumption] + ├─ ⟦D_i⟧⇒ᵥ :: ⟦Γ⟧ ⊢ Vᵢ ⇒ Bᵢ (Bᵢ discovered by recursive synthValue) + └─ Bᵢ ≤ ⟦Aᵢ⟧ ↦ cᵢ +``` +-/ +partial def synthValueStaticCall (md : Md) (callee : Identifier) (args : List StmtExprMd) : ElabM (FGLValue × HighType) := do + -- A name carrying a function signature but no explicit procedure grade is pure: + -- datatype constructors (from_None, from_int, ...) and pure runtime functions + -- live in typeEnv.names but not in procGrades. Default to pure, as elaborateCall + -- and lookupProcOutputs do; only a name graded above pure is rejected here. + let g := (← read).procGrades[callee.text]?.getD .pure + guard (g == .pure) + let sig ← lookupFuncSig callee.text + let checkedArgs ← checkArgValues args sig.params + pure (.staticCall md callee.text checkedArgs, sig.returnType) + +/-- ⟦·⟧⇒ᵥ: Value synthesis. Dispatches to clause helpers. -/ +partial def synthValue (expr : StmtExprMd) : ElabM (FGLValue × HighType) := do + let md := expr.md + match expr.val with + | .LiteralInt _ | .LiteralBool _ | .LiteralString _ => + match synthValueLiteral md expr.val with + | some r => pure r + | none => failure + | .Identifier id => synthValueVar md id + | .FieldSelect obj field => synthValueFieldSelect md obj field + | .StaticCall callee args => synthValueStaticCall md callee args + | _ => failure + +/-- Helper: check a list of arguments as values against parameter types. -/ +partial def checkArgValues (args : List StmtExprMd) (params : List (String × HighType)) : ElabM (List FGLValue) := do + match args, params with + | [], _ => pure [] + | arg :: rest, (_, pty) :: prest => do + let v ← checkValue arg pty + let vs ← checkArgValues rest prest + pure (v :: vs) + | _ :: _, [] => failure + +/-- ⟦·⟧⇐ᵥ: Value checking. Synthesizes then applies subtyping coercion. +``` +⟦D⟧⇐ᵥ (deterministic hole) :: ⟦Γ⟧ ⊢ functionCall hole_N [input₁,...,inputₖ] ⇐ ⟦A⟧ [functionCall] +└─ (hole_N : (⟦T₁⟧,...,⟦Tₖ⟧) → ⟦A⟧ & pure) ∈ ⟦Γ⟧ +``` +-/ +partial def checkValue (expr : StmtExprMd) (expected : HighType) : ElabM FGLValue := do + let md := expr.md + match expr.val with + | .Hole _ _ => + -- A hole in pure value position (a contract, or an argument of a pure call) + -- denotes a deterministic uninterpreted function of the procedure's inputs: + -- nondeterminism is meaningless in a pure value, so even a hole Translation + -- marked nondeterministic (e.g. an unresolved `re.search(...)` inside a + -- `requires`) is elaborated here as the deterministic `hole_N(inputs)`. This + -- keeps the contract well-typed; the caller obligation is sound but + -- uninterpretable (verification stays inconclusive, never unsound). + let hv ← freshVar "hole" + let args := (← read).procInputs.map fun (name, _) => FGLValue.var md name + modify fun s => { s with usedHoles := s.usedHoles ++ [(hv, true, expected)] } + pure (.staticCall md hv args) + | _ => + let (val, actual) ← synthValue expr + pure (applySubtype val (eraseType actual) (eraseType expected)) + +/-- ⟦·⟧⇐ₚ*: Check a list of statements as a producer (list extension). -/ +partial def checkProducers (stmts : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + match stmts with + | [] => pure .skip + | stmt :: rest => checkProducer stmt rest retTy grade + +/-- ⟦·⟧⇐ₚ (if): +``` +D :: Γ ⊢ (if c then t else f); k : A [if] +├─ D_c :: Γ ⊢ c : bool +├─ D_t :: Γ ⊢ t : A +├─ D_f :: Γ ⊢ f : A +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x_c bool M_c (ifThenElse x_c M_t M_f M_k) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_c⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_c ⇐ bool & d +└─ ⟦Γ⟧, x_c:bool ⊢ ifThenElse x_c M_t M_f M_k ⇐ ⟦A⟧ & d [ifThenElse] + ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇐ bool [subsumption] + │ ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇒ bool [var] + │ └─ bool ≤ bool ↦ id + ├─ ⟦D_t⟧⇐ₚ :: ⟦Γ⟧, x_c:bool ⊢ M_t ⇐ ⟦A⟧ & d + ├─ ⟦D_f⟧⇐ₚ :: ⟦Γ⟧, x_c:bool ⊢ M_f ⇐ ⟦A⟧ & d + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x_c:bool ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkProducerIf (md : Md) (cond thn : StmtExprMd) (els : Option StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M_c ← checkProducer cond [] .TBool grade + let x_c ← freshVar "cond" + let body ← extendEnv x_c .TBool do + let M_t ← checkProducer thn [] retTy grade + let M_f ← match els with + | some e => checkProducer e [] retTy grade + | none => pure .skip + let M_k ← checkProducers rest retTy grade + pure (.ifThenElse md (.var md x_c) M_t M_f M_k) + pure (.varDecl md x_c .TBool M_c body) + +/-- ⟦·⟧⇐ₚ (while): +``` +D :: Γ ⊢ (while c do body); k : A [while] +├─ D_c :: Γ ⊢ c : bool +├─ D_b :: Γ ⊢ body : A +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x_c bool M_c (whileLoop x_c M_b M_k) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_c⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_c ⇐ bool & d +└─ ⟦Γ⟧, x_c:bool ⊢ whileLoop x_c M_b M_k ⇐ ⟦A⟧ & d [whileLoop] + ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇐ bool [subsumption] + │ ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇒ bool [var] + │ └─ bool ≤ bool ↦ id + ├─ ⟦D_b⟧⇐ₚ :: ⟦Γ⟧, x_c:bool ⊢ M_b ⇐ ⟦A⟧ & d + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x_c:bool ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkProducerWhile (md : Md) (cond loopBody : StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M_c ← checkProducer cond [] .TBool grade + let x_c ← freshVar "cond" + let body ← extendEnv x_c .TBool do + let M_b ← checkProducer loopBody [] retTy grade + let M_k ← checkProducers rest retTy grade + pure (.whileLoop md (.var md x_c) M_b M_k) + pure (.varDecl md x_c .TBool M_c body) + +/-- ⟦·⟧⇐ₚ (varDecl): +``` +D :: Γ ⊢ (var x:T := e); k : A [varDecl] +├─ D_e :: Γ ⊢ e : T +└─ D_k :: Γ, x:T ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x ⟦T⟧ M_e M_k ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_e⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_e ⇐ ⟦T⟧ & d +└─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x:⟦T⟧ ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkProducerVarDecl (md : Md) (nameId : Identifier) (typeMd : HighTypeMd) + (initOpt : Option StmtExprMd) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M_e ← match initOpt with + | some init => checkProducer init [] typeMd.val grade + | none => do + let v ← checkValue (mkLaurel md (.Hole true none)) typeMd.val + pure (.produce md v) + let body ← extendEnv nameId.text typeMd.val do + checkProducers rest retTy grade + pure (.varDecl md nameId.text (eraseType typeMd.val) M_e body) + +/-- ⟦·⟧⇐ₚ (assert): +``` +D :: Γ ⊢ (assert c); k : A [assert] +├─ D_c :: Γ ⊢ c : bool +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x_c bool M_c (assert x_c M_k) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_c⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_c ⇐ bool & d +└─ ⟦Γ⟧, x_c:bool ⊢ assert x_c M_k ⇐ ⟦A⟧ & d [assert] + ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇐ bool [subsumption] + │ ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇒ bool [var] + │ └─ bool ≤ bool ↦ id + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x_c:bool ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkProducerAssert (md : Md) (cond : StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M_c ← checkProducer cond [] .TBool grade + let x_c ← freshVar "cond" + let body ← extendEnv x_c .TBool do + let M_k ← checkProducers rest retTy grade + pure (.assert md (.var md x_c) M_k) + pure (.varDecl md x_c .TBool M_c body) + +/-- ⟦·⟧⇐ₚ (assume): +``` +D :: Γ ⊢ (assume c); k : A [assume] +├─ D_c :: Γ ⊢ c : bool +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x_c bool M_c (assume x_c M_k) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_c⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_c ⇐ bool & d +└─ ⟦Γ⟧, x_c:bool ⊢ assume x_c M_k ⇐ ⟦A⟧ & d [assume] + ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇐ bool [subsumption] + │ ├─ ⟦Γ⟧, x_c:bool ⊢ x_c ⇒ bool [var] + │ └─ bool ≤ bool ↦ id + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x_c:bool ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkProducerAssume (md : Md) (cond : StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M_c ← checkProducer cond [] .TBool grade + let x_c ← freshVar "cond" + let body ← extendEnv x_c .TBool do + let M_k ← checkProducers rest retTy grade + pure (.assume md (.var md x_c) M_k) + pure (.varDecl md x_c .TBool M_c body) + +partial def elaborateCall (md : Md) (callee : Identifier) (args : List StmtExprMd) + (grade : Grade) (body : FGLValue → Grade → ElabM FGLProducer) : ElabM FGLProducer := do + let callGrade := (← read).procGrades[callee.text]?.getD .pure + let some residual := Grade.leftResidual callGrade grade | + dbg_trace s!"elaborateCall: leftResidual {repr callGrade} {repr grade} = none for {callee.text}"; failure + let sig ← lookupFuncSig callee.text + bindArgs md args sig.params grade fun boundVars => do + match callGrade with + | .pure => + let rv := FGLValue.staticCall md callee.text boundVars + body rv residual + | _ => + let declaredOutputs ← lookupProcOutputs callee.text + mkGradedCall md callee.text boundVars declaredOutputs callGrade fun rv => + body rv residual + +/-- ⟦·⟧⇐ₚ (bare call, discards return value): +``` +D :: Γ ⊢ g(e₁,…,eₙ); k : A [call] +├─ (g : (A₁,...,Aₙ) → B) ∈ Γ +├─ Dᵢ :: Γ ⊢ eᵢ : Aᵢ (for each i) +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x₁ ⟦A₁⟧ M₁ (...(varDecl xₙ ⟦Aₙ⟧ Mₙ (procedureCall g (pre ++ [x₁,...,xₙ]) outs M_k))) ⇐ ⟦A⟧ & d +├─ ⟦D₁⟧⇐ₚ :: ⟦Γ⟧ ⊢ M₁ ⇐ ⟦A₁⟧ & d +├─ ... [varDecl] +├─ ⟦Dₙ⟧⇐ₚ :: ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ₋₁:⟦Aₙ₋₁⟧ ⊢ Mₙ ⇐ ⟦Aₙ⟧ & d +└─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ procedureCall g (pre ++ [x₁,...,xₙ]) outs M_k ⇐ ⟦A⟧ & d [producerSubsumption] + ├─ (g : (⟦A₁⟧,...,⟦Aₙ⟧) → ⟦B⟧ & d') ∈ ⟦Γ⟧ + ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ xᵢ ⇐ ⟦Aᵢ⟧ [subsumption] + │ ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ xᵢ ⇒ ⟦Aᵢ⟧ [var] + │ └─ ⟦Aᵢ⟧ ≤ ⟦Aᵢ⟧ ↦ id + ├─ d' ≤ d ↦ (pre, outs) + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ M_k ⇐ ⟦A⟧ & (d'\d) +``` +-/ +partial def checkProducerStaticCall (md : Md) (callee : Identifier) (args : List StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + elaborateCall md callee args grade fun rv residual => do + match rest with + | [] => + let sig ← lookupFuncSig callee.text + pure (.produce md (applySubtype rv (eraseType sig.returnType) (eraseType retTy))) + | _ => checkProducers rest retTy residual + +/-- ⟦·⟧⇐ₚ (block): +``` +D :: Γ ⊢ {body}_l; k : A [block] +├─ D_b :: Γ, l ⊢ body : A +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ labeledBlock l M_b M_k ⇐ ⟦A⟧ & d [labeledBlock] +├─ ⟦D_b⟧⇐ₚ :: ⟦Γ⟧, l ⊢ M_b ⇐ ⟦A⟧ & d +└─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧ ⊢ M_k ⇐ ⟦A⟧ & d +``` +Unlabeled blocks are flattened into the enclosing scope. +-/ +partial def checkProducerBlock (md : Md) (stmts : List StmtExprMd) (label : Option String) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + match label with + | some l => + let M_b ← checkProducers stmts retTy grade + let M_k ← checkProducers rest retTy grade + pure (.labeledBlock md l M_b M_k) + | none => checkProducers (stmts ++ rest) retTy grade + +/-- ⟦·⟧⇐ₚ: Producer checking. Entry point of the translation. + Dispatches on statement form to clause helpers. -/ +partial def checkProducer (stmt : StmtExprMd) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let md := stmt.md + match stmt.val with + | .IfThenElse cond thn els => checkProducerIf md cond thn els rest retTy grade + | .While cond _invs _dec loopBody => checkProducerWhile md cond loopBody rest retTy grade + | .Exit target => pure (.exit md target) + | .LocalVariable nameId typeMd initOpt => checkProducerVarDecl md nameId typeMd initOpt rest retTy grade + | .Assert cond => checkProducerAssert md cond rest retTy grade + | .Assume cond => checkProducerAssume md cond rest retTy grade + | .Assign targets value => match targets with + | [target] => checkAssign target value rest retTy grade + | _ => failure + | .StaticCall callee args => checkProducerStaticCall md callee args rest retTy grade + | .Block stmts label => checkProducerBlock md stmts label rest retTy grade + | .New _ => failure + | .Hole deterministic _ => do + let hv ← freshVar "havoc" + modify fun s => { s with usedHoles := s.usedHoles ++ [(hv, deterministic, retTy)] } + -- A deterministic hole is a pure function of the procedure's inputs, so it is + -- declared with those inputs (see emission below) and must be applied to them + -- here — same as the value-judgment `.Hole` case. A nondeterministic hole + -- (havoc) is declared with no inputs and called with none. + let env ← read + let args := if deterministic then env.procInputs.map (fun (name, _) => FGLValue.var md name) else [] + let declaredOutputs := [("result", retTy)] + mkGradedCall md hv args declaredOutputs .proc fun rv => do + let M_k ← checkProducers rest retTy grade + match rest with + | [] => pure (.produce md rv) + | _ => pure M_k + | _ => do + let v ← checkValue stmt retTy + match rest with + | [] => pure (.produce md v) + | _ => failure + +/-- Bind a list of arguments as producers via nested varDecls. + Each arg is checked as a producer, bound to a fresh var, and the + continuation receives the list of bound values. -/ +partial def bindArgs (md : Md) (args : List StmtExprMd) (params : List (String × HighType)) + (grade : Grade) (cont : List FGLValue → ElabM FGLProducer) : ElabM FGLProducer := do + match args, params with + | [], _ => cont [] + | arg :: restArgs, (_, pty) :: restParams => do + let M_arg ← checkProducer arg [] pty grade + let x_arg ← freshVar "arg" + let body ← extendEnv x_arg pty do + bindArgs md restArgs restParams grade fun restVars => + cont (.var md x_arg :: restVars) + pure (.varDecl md x_arg (eraseType pty) M_arg body) + | _ :: _, [] => failure + +/-- ⟦·⟧⇐ₚ (field write): +``` +D :: Γ ⊢ (obj.f := v); k : A [fieldWrite] +├─ D_obj :: Γ ⊢ obj : C (C discovered by synthesis on obj) +├─ fieldType(C, f) = T +├─ D_v :: Γ ⊢ v : T +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x_obj ⟦C⟧ M_obj (varDecl x_v ⟦T⟧ M_v (varDecl h' Heap M_update M_k)) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D_obj⟧⇐ₚ :: ⟦Γ⟧ ⊢ M_obj ⇐ ⟦C⟧ & d +└─ ⟦Γ⟧, x_obj:⟦C⟧ ⊢ varDecl x_v ⟦T⟧ M_v (varDecl h' Heap M_update M_k) ⇐ ⟦A⟧ & d [varDecl] + ├─ ⟦D_v⟧⇐ₚ :: ⟦Γ⟧, x_obj ⊢ M_v ⇐ ⟦T⟧ & d + └─ ⟦Γ⟧, x_obj, x_v ⊢ varDecl h' Heap M_update M_k ⇐ ⟦A⟧ & d [varDecl] + ├─ ⟦Γ⟧, x_obj, x_v ⊢ produce (functionCall updateField [$heap, x_obj, $field.C.f, functionCall box_T [x_v]]) ⇐ Heap & d [produce] + │ └─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall updateField [$heap, x_obj, $field.C.f, functionCall box_T [x_v]] ⇐ Heap [subsumption] + │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall updateField [$heap, x_obj, $field.C.f, functionCall box_T [x_v]] ⇒ Heap [functionCall] + │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ $heap ⇐ Heap [subsumption] + │ │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ $heap ⇒ Heap [var] + │ │ │ └─ Heap ≤ Heap ↦ id + │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ x_obj ⇐ Composite [subsumption] + │ │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ x_obj ⇒ Composite [var] + │ │ │ └─ Composite ≤ Composite ↦ id + │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall $field.C.f [] ⇐ Field [subsumption] + │ │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall $field.C.f [] ⇒ Field [functionCall] + │ │ │ └─ Field ≤ Field ↦ id + │ │ └─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall box_T [x_v] ⇐ Box [subsumption] + │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ functionCall box_T [x_v] ⇒ Box [functionCall] + │ │ │ └─ ⟦Γ⟧, x_obj, x_v ⊢ x_v ⇐ ⟦T⟧ [subsumption] + │ │ │ ├─ ⟦Γ⟧, x_obj, x_v ⊢ x_v ⇒ ⟦T⟧ [var] + │ │ │ └─ ⟦T⟧ ≤ ⟦T⟧ ↦ id + │ │ └─ Box ≤ Box ↦ id + │ └─ Heap ≤ Heap ↦ id + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x_obj, x_v, h':Heap ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkAssignFieldWrite (md : Md) (obj : StmtExprMd) (field : Identifier) + (value : StmtExprMd) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + guard (Grade.leftResidual .heap grade |>.isSome) + let (_, objHighTy) ← synthValue obj + match objHighTy with + | .UserDefined id => + let owner := id.text + let fieldTy ← lookupFieldType owner field.text + let M_obj ← checkProducer obj [] objHighTy grade + let x_obj ← freshVar "obj" + let qualifiedName := "$field." ++ owner ++ "." ++ field.text + recordBoxUse fieldTy + let body_obj ← extendEnv x_obj objHighTy do + let M_v ← checkProducer value [] fieldTy grade + let x_v ← freshVar "val" + let body_v ← extendEnv x_v fieldTy do + match (← get).heapVar with + | some hv => + let boxed := FGLValue.staticCall md (boxConstructorName fieldTy) [.var md x_v] + let newHeap := FGLValue.staticCall md "updateField" [.var md hv, .var md x_obj, .staticCall md qualifiedName [], boxed] + let freshH ← freshVar "heap" + modify fun s => { s with heapVar := some freshH } + let body_h ← extendEnv freshH .THeap do + checkProducers rest retTy grade + pure (.varDecl md freshH (.TCore "Heap") (.produce md newHeap) body_h) + | none => failure + pure (.varDecl md x_v (eraseType fieldTy) M_v body_v) + pure (.varDecl md x_obj (.TCore "Composite") M_obj body_obj) + | _ => checkProducers rest retTy grade + +/-- Dispatches on LHS to get assignee, then on RHS form. -/ +partial def checkAssign (target value : StmtExprMd) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let md := target.md + match target.val with + | .FieldSelect obj field => checkAssignFieldWrite md obj field value rest retTy grade + | .Identifier id => + let .variable targetTy := (← lookupEnv id.text) | failure + match value.val with + | .StaticCall callee args => checkAssignStaticCall md id.text targetTy callee args rest retTy grade + | .New classId => checkAssignNew md id.text targetTy classId rest retTy grade + | _ => checkAssignVar md id.text targetTy value rest retTy grade + | _ => failure + +/-- ⟦·⟧⇐ₚ (assign, generic RHS): +``` +D :: Γ ⊢ (x := e); k : A [assign] +├─ D_e :: Γ ⊢ e : Γ(x) +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ assign x M M_k ⇐ ⟦A⟧ & d [assign] +├─ ⟦D_e⟧⇐ₚ :: ⟦Γ⟧ ⊢ M ⇐ ⟦Γ(x)⟧ & d +└─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧ ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkAssignVar (md : Md) (targetName : String) (targetTy : HighType) + (value : StmtExprMd) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + let M ← checkProducer value [] targetTy grade + let M_k ← checkProducers rest retTy grade + pure (.assign md (.var md targetName) M M_k) + +/-- ⟦·⟧⇐ₚ (assign + call): +``` +D :: Γ ⊢ (x := f(e₁,...,eₙ)); k : A [assign] +├─ D_e :: Γ ⊢ f(e₁,...,eₙ) : Γ(x) [call] +│ ├─ (f : (A₁,...,Aₙ) → B) ∈ Γ +│ └─ Dᵢ :: Γ ⊢ eᵢ : Aᵢ (for i = 1,...,n) +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl x₁ ⟦A₁⟧ M₁ (...(varDecl xₙ ⟦Aₙ⟧ Mₙ (procedureCall f (pre ++ [x₁,...,xₙ]) outs (assign x (produce c(rv)) M_k)))) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦D₁⟧⇐ₚ :: ⟦Γ⟧ ⊢ M₁ ⇐ ⟦A₁⟧ & d +├─ ... [varDecl] +├─ ⟦Dₙ⟧⇐ₚ :: ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ₋₁:⟦Aₙ₋₁⟧ ⊢ Mₙ ⇐ ⟦Aₙ⟧ & d +└─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ procedureCall f (pre ++ [x₁,...,xₙ]) outs (assign x (produce c(rv)) M_k) ⇐ ⟦A⟧ & d [producerSubsumption] + ├─ (f : (⟦A₁⟧,...,⟦Aₙ⟧) → ⟦B⟧ & d') ∈ ⟦Γ⟧ + ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ xᵢ ⇐ ⟦Aᵢ⟧ [subsumption] + │ ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧ ⊢ xᵢ ⇒ ⟦Aᵢ⟧ [var] + │ └─ ⟦Aᵢ⟧ ≤ ⟦Aᵢ⟧ ↦ id + ├─ d' ≤ d ↦ (pre, outs) where (rv : ⟦B⟧) ∈ outs + └─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ assign x (produce c(rv)) M_k ⇐ ⟦A⟧ & (d'\d) [assign] + ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ produce c(rv) ⇐ ⟦Γ(x)⟧ & (d'\d) [produce] + │ └─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ c(rv) ⇐ ⟦Γ(x)⟧ [subsumption] + │ ├─ ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ rv ⇒ ⟦B⟧ [var] + │ └─ ⟦B⟧ ≤ ⟦Γ(x)⟧ ↦ c + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, x₁:⟦A₁⟧,...,xₙ:⟦Aₙ⟧, outs ⊢ M_k ⇐ ⟦A⟧ & (d'\d) +``` +-/ +partial def checkAssignStaticCall (md : Md) (targetName : String) (targetTy : HighType) + (callee : Identifier) (args : List StmtExprMd) + (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + dbg_trace s!"checkAssignStaticCall: {targetName} := {callee.text}(...) at grade={repr grade}" + let sig ← lookupFuncSig callee.text + elaborateCall md callee args grade fun rv residual => do + let coerced := applySubtype rv (eraseType sig.returnType) (eraseType targetTy) + let M_k ← checkProducers rest retTy residual + pure (.assign md (.var md targetName) (.produce md coerced) M_k) + +/-- ⟦·⟧⇐ₚ (assign + new): +``` +D :: Γ ⊢ (x := new C); k : A [assign] +├─ D_e :: Γ ⊢ new C : Γ(x) [new] +│ └─ C is a class ∈ Γ +└─ D_k :: Γ ⊢ k : A + + ↦ + +⟦D⟧⇐ₚ :: ⟦Γ⟧ ⊢ varDecl h' Heap (produce (functionCall increment [$heap])) (assign x (produce c(functionCall MkComposite [functionCall Heap..nextReference! [$heap], functionCall C_TypeTag []])) M_k) ⇐ ⟦A⟧ & d [varDecl] +├─ ⟦Γ⟧ ⊢ produce (functionCall increment [$heap]) ⇐ Heap & d [produce] +│ └─ ⟦Γ⟧ ⊢ functionCall increment [$heap] ⇐ Heap [subsumption] +│ ├─ ⟦Γ⟧ ⊢ functionCall increment [$heap] ⇒ Heap [functionCall] +│ │ └─ ⟦Γ⟧ ⊢ $heap ⇐ Heap [subsumption] +│ │ ├─ ⟦Γ⟧ ⊢ $heap ⇒ Heap [var] +│ │ └─ Heap ≤ Heap ↦ id +│ └─ Heap ≤ Heap ↦ id +└─ ⟦Γ⟧, h':Heap ⊢ assign x (produce c(functionCall MkComposite [functionCall Heap..nextReference! [$heap], functionCall C_TypeTag []])) M_k ⇐ ⟦A⟧ & d [assign] + ├─ ⟦Γ⟧, h':Heap ⊢ produce c(functionCall MkComposite [...]) ⇐ ⟦Γ(x)⟧ & d [produce] + │ └─ ⟦Γ⟧, h':Heap ⊢ c(functionCall MkComposite [...]) ⇐ ⟦Γ(x)⟧ [subsumption] + │ ├─ ⟦Γ⟧, h':Heap ⊢ functionCall MkComposite [functionCall Heap..nextReference! [$heap], functionCall C_TypeTag []] ⇒ Composite [functionCall] + │ │ ├─ ⟦Γ⟧, h':Heap ⊢ functionCall Heap..nextReference! [$heap] ⇐ int [subsumption] + │ │ │ ├─ ⟦Γ⟧, h':Heap ⊢ functionCall Heap..nextReference! [$heap] ⇒ int [functionCall] + │ │ │ │ └─ ⟦Γ⟧, h':Heap ⊢ $heap ⇐ Heap [subsumption] + │ │ │ │ ├─ ⟦Γ⟧, h':Heap ⊢ $heap ⇒ Heap [var] + │ │ │ │ └─ Heap ≤ Heap ↦ id + │ │ │ └─ int ≤ int ↦ id + │ │ └─ ⟦Γ⟧, h':Heap ⊢ functionCall C_TypeTag [] ⇐ TypeTag [subsumption] + │ │ ├─ ⟦Γ⟧, h':Heap ⊢ functionCall C_TypeTag [] ⇒ TypeTag [functionCall] + │ │ └─ TypeTag ≤ TypeTag ↦ id + │ └─ Composite ≤ ⟦Γ(x)⟧ ↦ c + └─ ⟦D_k⟧⇐ₚ* :: ⟦Γ⟧, h':Heap ⊢ M_k ⇐ ⟦A⟧ & d +``` +-/ +partial def checkAssignNew (md : Md) (targetName : String) (targetTy : HighType) + (classId : Identifier) (rest : List StmtExprMd) (retTy : HighType) (grade : Grade) : ElabM FGLProducer := do + match (← get).heapVar with + | some hv => + let newHeap := FGLValue.staticCall md "increment" [.var md hv] + let ref := FGLValue.staticCall md "Heap..nextReference!" [.var md hv] + let obj := FGLValue.staticCall md "MkComposite" [ref, .staticCall md (classId.text ++ "_TypeTag") []] + let coerced := applySubtype obj (.TCore "Composite") (eraseType targetTy) + let freshH ← freshVar "heap" + modify fun s => { s with heapVar := some freshH } + let M_k ← extendEnv freshH .THeap do checkProducers rest retTy grade + pure (.varDecl md freshH (.TCore "Heap") (.produce md newHeap) + (.assign md (.var md targetName) (.produce md coerced) M_k)) + | none => failure + +end + +/-! ## Grade Inference + +Grade inference is coinductive over the call graph. For each procedure, +try elaboration at successively higher grades until one succeeds. When a +callee's grade exceeds the trial grade, the left residual is undefined, +elaboration fails (returns `none`), and the next grade is tried. The +finite lattice guarantees convergence. -/ + +/-- Try elaborating a procedure body at each grade in order. Returns the + first grade that succeeds, or `heapErr` as fallback. -/ +partial def tryGrades (callee : String) (env : ElabEnv) (body : StmtExprMd) + (retTy : HighType) (grades : List Grade) : Option Grade := + match grades with + | [] => some .heapErr + | g :: rest => + let st : ElabState := { + freshCounter := 0 + heapVar := if g == .heap || g == .heapErr then some "$heap" else none } + let trialEnv := { env with procGrades := env.procGrades.insert callee g } + match (checkProducer body [] retTy g).run trialEnv |>.run st with + | some _ => some g + | none => tryGrades callee env body retTy rest + +/-! ## Projection (Destination Passing Style) + +Projection reverses elaboration: GFGL derivations → Laurel derivations. +Uses a writer monad that accumulates declarations (hoisted to procedure top). + +``` +⟦D⟧ₓ⁻¹ : (⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d) → ∃e⃗. (Γ, x : A ⊢ e⃗ : TVoid) +``` +-/ + +structure ProjM (α : Type) where + run : α × List StmtExprMd + +instance : Monad ProjM where + pure a := ⟨(a, [])⟩ + bind ma f := let (a, d1) := ma.run; let (b, d2) := (f a).run; ⟨(b, d1 ++ d2)⟩ + +def projDecl (decl : StmtExprMd) : ProjM Unit := ⟨((), [decl])⟩ + +def projectValue : FGLValue → StmtExprMd + | .litInt md n => mkLaurel md (.LiteralInt n) + | .litBool md b => mkLaurel md (.LiteralBool b) + | .litString md s => mkLaurel md (.LiteralString s) + | .var md name => mkLaurel md (.Identifier (Identifier.mk name none)) + | .fromInt md v => mkLaurel md (.StaticCall (Identifier.mk "from_int" none) [projectValue v]) + | .fromStr md v => mkLaurel md (.StaticCall (Identifier.mk "from_str" none) [projectValue v]) + | .fromBool md v => mkLaurel md (.StaticCall (Identifier.mk "from_bool" none) [projectValue v]) + | .fromFloat md v => mkLaurel md (.StaticCall (Identifier.mk "from_float" none) [projectValue v]) + | .fromComposite md v => mkLaurel md (.StaticCall (Identifier.mk "from_Composite" none) [projectValue v]) + | .fromListAny md v => mkLaurel md (.StaticCall (Identifier.mk "from_ListAny" none) [projectValue v]) + | .fromDictStrAny md v => mkLaurel md (.StaticCall (Identifier.mk "from_DictStrAny" none) [projectValue v]) + | .fromNone md => mkLaurel md (.StaticCall (Identifier.mk "from_None" none) []) + | .fieldAccess md obj f => mkLaurel md (.FieldSelect (projectValue obj) (Identifier.mk f none)) + | .staticCall md name args => mkLaurel md (.StaticCall (Identifier.mk name none) (args.map projectValue)) + +mutual + +/-- Destination-passing projection. +``` +⟦·⟧ₓ⁻¹ : (⟦Γ⟧ ⊢ M ⇔ ⟦A⟧ & d) → ∃e⃗. (Γ, x : A ⊢ e⃗ : TVoid) +⟦·⟧⁻¹ : (⟦Γ⟧ ⊢ V ⇔ ⟦A⟧) → ∃e. (Γ ⊢ e : A) +``` +Dispatches to per-constructor helpers. -/ +partial def proj (dest : Option StmtExprMd) : FGLProducer → ProjM (List StmtExprMd) + | .produce md v => projProduce dest md v + | .varDecl md name ty init body => projVarDecl dest md name ty init body + | .assign md target val body => projAssign dest md target val body + | .ifThenElse md cond thn els after => projIfThenElse dest md cond thn els after + | .whileLoop md cond body after => projWhileLoop dest md cond body after + | .procedureCall md callee args outputs body => projProcedureCall dest md callee args outputs body + | .assert md cond body => projAssert dest md cond body + | .assume md cond body => projAssume dest md cond body + | .labeledBlock md label body after => projLabeledBlock dest md label body after + | .exit md label => projExit md label + | .skip => projSkip + +/-- projProduce: +``` +D :: ⟦Γ⟧ ⊢ produce V ⇐ ⟦A⟧ & d [produce] +└─ D_V :: ⟦Γ⟧ ⊢ V ⇐ ⟦A⟧ + + ↦ (destination x : A present) + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (x := e_V); skip : TVoid [assign] +├─ ⟦D_V⟧⁻¹ :: Γ ⊢ e_V : A +└─ Γ ⊢ skip : TVoid [skip] +``` +With no destination (a `TVoid` command — the body, or a control-flow path with +no `x : A` in context), the produced value has nowhere to go and projects to the +empty statement list. -/ +partial def projProduce (dest : Option StmtExprMd) (md : Md) (v : FGLValue) : ProjM (List StmtExprMd) := + match dest with + | some d => pure [mkLaurel md (.Assign [d] (projectValue v))] + | none => pure [] + +/-- projVarDecl: +``` +D :: ⟦Γ⟧ ⊢ varDecl y T M N ⇐ ⟦A⟧ & d +├─ D_M :: ⟦Γ⟧ ⊢ M ⇐ T & d +└─ D_N :: ⟦Γ⟧, y:T ⊢ N ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (var y : T; e⃗_M; e⃗_N) : TVoid [varDecl] +├─ ⟦D_M⟧ᵧ⁻¹ :: Γ, y : T ⊢ e⃗_M : TVoid +└─ ⟦D_N⟧ₓ⁻¹ :: Γ, x : A, y : T ⊢ e⃗_N : TVoid +``` +-/ +partial def projVarDecl (dest : Option StmtExprMd) (md : Md) (name : String) (ty : LowType) + (init : FGLProducer) (body : FGLProducer) : ProjM (List StmtExprMd) := do + let nameExpr := mkLaurel md (.Identifier (Identifier.mk name none)) + let decl := mkLaurel md (.LocalVariable (Identifier.mk name none) (mkHighTypeMd md (liftType ty)) none) + projDecl decl + let initStmts ← proj (some nameExpr) init + let bodyStmts ← proj dest body + pure (initStmts ++ bodyStmts) + +/-- projAssign: +``` +D :: ⟦Γ⟧ ⊢ assign y M K ⇐ ⟦A⟧ & d +├─ D_M :: ⟦Γ⟧ ⊢ M ⇐ ⟦Γ(y)⟧ & d +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (e⃗_M; e⃗_K) : TVoid [assign] +├─ ⟦D_M⟧ᵧ⁻¹ :: Γ, y : Γ(y) ⊢ e⃗_M : TVoid +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projAssign (dest : Option StmtExprMd) (_md : Md) (target : FGLValue) + (val : FGLProducer) (body : FGLProducer) : ProjM (List StmtExprMd) := do + let valStmts ← proj (some (projectValue target)) val + let bodyStmts ← proj dest body + pure (valStmts ++ bodyStmts) + +/-- projIfThenElse: +``` +D :: ⟦Γ⟧ ⊢ ifThenElse V M N K ⇐ ⟦A⟧ & d +├─ D_V :: ⟦Γ⟧ ⊢ V ⇐ bool +├─ D_M :: ⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d +├─ D_N :: ⟦Γ⟧ ⊢ N ⇐ ⟦A⟧ & d +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (if e_V then {e⃗_M} else {e⃗_N}); e⃗_K : TVoid [if] +├─ ⟦D_V⟧⁻¹ :: Γ ⊢ e_V : bool +├─ ⟦D_M⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_M : TVoid +├─ ⟦D_N⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_N : TVoid +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projIfThenElse (dest : Option StmtExprMd) (md : Md) (cond : FGLValue) + (thn els after : FGLProducer) : ProjM (List StmtExprMd) := do + let thnStmts ← proj dest thn + let elsStmts ← proj dest els + let thnBlock := mkLaurel md (.Block thnStmts none) + let elsBlock := mkLaurel md (.Block elsStmts none) + let ite := mkLaurel md (.IfThenElse (projectValue cond) thnBlock (some elsBlock)) + let afterStmts ← proj dest after + pure ([ite] ++ afterStmts) + +/-- projWhileLoop: +``` +D :: ⟦Γ⟧ ⊢ whileLoop V M K ⇐ ⟦A⟧ & d +├─ D_V :: ⟦Γ⟧ ⊢ V ⇐ bool +├─ D_M :: ⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (while e_V {e⃗_M}); e⃗_K : TVoid [while] +├─ ⟦D_V⟧⁻¹ :: Γ ⊢ e_V : bool +├─ ⟦D_M⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_M : TVoid +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projWhileLoop (dest : Option StmtExprMd) (md : Md) (cond : FGLValue) + (body after : FGLProducer) : ProjM (List StmtExprMd) := do + let bodyStmts ← proj dest body + let bodyBlock := mkLaurel md (.Block bodyStmts none) + let loop := mkLaurel md (.While (projectValue cond) [] none bodyBlock) + let afterStmts ← proj dest after + pure ([loop] ++ afterStmts) + +/-- projProcedureCall: +``` +D :: ⟦Γ⟧ ⊢ procedureCall f [Vᵢ] [outⱼ : Tⱼ] K ⇐ ⟦A⟧ & d +├─ D_Vᵢ :: ⟦Γ⟧ ⊢ Vᵢ ⇐ ⟦Aᵢ⟧ +└─ D_K :: ⟦Γ⟧, outⱼ:Tⱼ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (var out₁:T₁; ...; var outₙ:Tₙ; (out₁,...,outₙ) := f(e_Vᵢ); e⃗_K) : TVoid [call] +├─ ⟦D_Vᵢ⟧⁻¹ :: Γ ⊢ e_Vᵢ : Aᵢ +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A, out₁:T₁, ..., outₙ:Tₙ ⊢ e⃗_K : TVoid +``` +-/ +partial def projProcedureCall (dest : Option StmtExprMd) (md : Md) (callee : String) + (args : List FGLValue) (outputs : List (String × LowType)) (body : FGLProducer) : ProjM (List StmtExprMd) := do + for (n, ty) in outputs do + projDecl (mkLaurel md (.LocalVariable (Identifier.mk n none) (mkHighTypeMd md (liftType ty)) none)) + let targets := outputs.map fun (n, _) => mkLaurel md (.Identifier (Identifier.mk n none)) + let call := mkLaurel md (.Assign targets (mkLaurel md (.StaticCall (Identifier.mk callee none) (args.map projectValue)))) + let bodyStmts ← proj dest body + pure ([call] ++ bodyStmts) + +/-- projAssert: +``` +D :: ⟦Γ⟧ ⊢ assert V K ⇐ ⟦A⟧ & d +├─ D_V :: ⟦Γ⟧ ⊢ V ⇐ bool +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (assert e_V); e⃗_K : TVoid [assert] +├─ ⟦D_V⟧⁻¹ :: Γ ⊢ e_V : bool +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projAssert (dest : Option StmtExprMd) (md : Md) (cond : FGLValue) + (body : FGLProducer) : ProjM (List StmtExprMd) := do + let bodyStmts ← proj dest body + pure ([mkLaurel md (.Assert (projectValue cond))] ++ bodyStmts) + +/-- projAssume: +``` +D :: ⟦Γ⟧ ⊢ assume V K ⇐ ⟦A⟧ & d +├─ D_V :: ⟦Γ⟧ ⊢ V ⇐ bool +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ (assume e_V); e⃗_K : TVoid [assume] +├─ ⟦D_V⟧⁻¹ :: Γ ⊢ e_V : bool +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projAssume (dest : Option StmtExprMd) (md : Md) (cond : FGLValue) + (body : FGLProducer) : ProjM (List StmtExprMd) := do + let bodyStmts ← proj dest body + pure ([mkLaurel md (.Assume (projectValue cond))] ++ bodyStmts) + +/-- projLabeledBlock: +``` +D :: ⟦Γ⟧ ⊢ labeledBlock l M K ⇐ ⟦A⟧ & d +├─ D_M :: ⟦Γ⟧, l ⊢ M ⇐ ⟦A⟧ & d +└─ D_K :: ⟦Γ⟧ ⊢ K ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ {e⃗_M}_l; e⃗_K : TVoid [labeledBlock] +├─ ⟦D_M⟧ₓ⁻¹ :: Γ, x : A, l ⊢ e⃗_M : TVoid +└─ ⟦D_K⟧ₓ⁻¹ :: Γ, x : A ⊢ e⃗_K : TVoid +``` +-/ +partial def projLabeledBlock (dest : Option StmtExprMd) (md : Md) (label : String) + (body after : FGLProducer) : ProjM (List StmtExprMd) := do + let bodyStmts ← proj dest body + let bodyBlock := mkLaurel md (.Block bodyStmts (some label)) + let afterStmts ← proj dest after + pure ([bodyBlock] ++ afterStmts) + +/-- projExit: +``` +D :: ⟦Γ⟧ ⊢ exit l ⇐ ⟦A⟧ & d + + ↦ + +⟦D⟧ₓ⁻¹ :: Γ, x : A ⊢ exit l : TVoid [exit] +└─ l ∈ Γ +``` +-/ +partial def projExit (md : Md) (label : String) : ProjM (List StmtExprMd) := + pure [mkLaurel md (.Exit label)] + +/-- projSkip: +``` +⟦skip⟧ₓ⁻¹ :: Γ, x : A ⊢ skip : TVoid [skip] +``` +-/ +partial def projSkip : ProjM (List StmtExprMd) := pure [] + +end + +/-- Run projection of a procedure body. The body is a command (`TVoid`), so it + has no destination: its return value reaches `LaurelResult` only through the + explicit `LaurelResult := e` assignments Translation emits for `return e`, not + through a tail value. Declarations hoisted to top. -/ +def projectProducer (prod : FGLProducer) : List StmtExprMd := + let (stmts, decls) := (proj none prod).run + decls ++ stmts + +/-- Run projection, return as a block. -/ +def projectBody (md : Md) (prod : FGLProducer) : StmtExprMd := + mkLaurel md (.Block (projectProducer prod) none) + +/-! ## Entry Point + +`fullElaborate` orchestrates both passes. Pass 1 iterates to a fixpoint on +grades. Pass 2 elaborates each procedure at its final grade and projects +back to Laurel. Also emits auxiliary datatypes (TypeTag, Composite, Field, +Box) and hole procedure declarations needed by the output program. -/ + +/-- Entry point: elaborates a Laurel program. Returns the elaborated program + and a list of procedure names that failed to elaborate (emitted unchanged). -/ +def fullElaborate (program : Laurel.Program) (runtime : Laurel.Program := default) (initialGrades : Std.HashMap String Grade := {}) : Except String (Laurel.Program × List String) := do + let typeEnv := buildElabEnvFromProgram program runtime + let baseEnv : ElabEnv := { typeEnv := typeEnv, program := program, runtime := runtime } + + -- PASS 1: Coinductive fixpoint iteration + let mut knownGrades : Std.HashMap String Grade := initialGrades + let mut changed := true + while changed do + changed := false + for proc in program.staticProcedures do + let bodyOpt := match proc.body with + | .Transparent b => some b + | .Opaque _ (some impl) _ => some impl + | _ => none + match bodyOpt with + | some bodyExpr => + let extEnv := (proc.inputs ++ proc.outputs).foldl + (fun (e : ElabTypeEnv) p => { e with names := e.names.insert p.name.text (.variable p.type.val) }) typeEnv + let inputList := proc.inputs.map fun p => (p.name.text, p.type.val) + let procEnv : ElabEnv := { baseEnv with typeEnv := extEnv, procGrades := knownGrades, procInputs := inputList } + -- The body is a command (DPS): checked at TVoid, not the return type. The + -- return value flows only through explicit `LaurelResult := e` assigns. + match tryGrades proc.name.text procEnv bodyExpr .TVoid [.pure, .proc, .err, .heap, .heapErr] with + | some g => + let g := if proc.outputs.length > 1 then Grade.join g .err else g + if knownGrades[proc.name.text]? != some g then + knownGrades := knownGrades.insert proc.name.text g + changed := true + | none => pure () + | none => pure () + + -- PASS 2: Elaborate each proc with final grades + let mut procs : List Laurel.Procedure := [] + let mut allBoxConstructors : List (String × String × HighType) := [] + let mut allHoles : List (String × Bool × List (String × HighType) × HighType) := [] + let mut elabFailures : List String := [] + let mut globalCounter : Nat := 0 + for proc in program.staticProcedures do + match proc.body with + | .Transparent bodyExpr => + let extEnv := (proc.inputs ++ proc.outputs).foldl + (fun (e : ElabTypeEnv) p => { e with names := e.names.insert p.name.text (.variable p.type.val) }) typeEnv + let inputList := proc.inputs.map fun p => (p.name.text, p.type.val) + let procEnv : ElabEnv := { baseEnv with typeEnv := extEnv, procGrades := knownGrades, procInputs := inputList } + let g := knownGrades[proc.name.text]?.getD .pure + let st : ElabState := { + freshCounter := globalCounter + heapVar := if g == .heap || g == .heapErr then some "$heap" else none } + -- Elaborate preconditions: a `requires` is a pure value of type bool, not an + -- effect-sequenced statement, so it elaborates with the value judgment + -- (checkValue) rather than the producer judgment. checkValue synthesizes the + -- term and applies subtyping coercions — from_int/from_str on argument + -- literals (the runtime operators take Any parameters) and Any_to_bool on the + -- Any-typed result — then projectValue yields the single Core expression. + -- Holes are collected as for bodies. + let mut elabPreconditions : List (WithMetadata StmtExpr) := [] + for pre in proc.preconditions do + let preSt : ElabState := { freshCounter := globalCounter } + match (checkValue pre .TBool).run procEnv |>.run preSt with + | some (preVal, preSt') => + globalCounter := preSt'.freshCounter + let newHoles := (preSt'.usedHoles.map fun (name, det, outTy) => (name, det, inputList, outTy)).filter + (fun (n, _, _, _) => !allHoles.any (fun (n2, _, _, _) => n == n2)) + allHoles := allHoles ++ newHoles + elabPreconditions := elabPreconditions ++ [⟨(projectValue preVal).val, pre.md⟩] + | none => elabPreconditions := elabPreconditions ++ [pre] + let proc := { proc with preconditions := elabPreconditions } + match (checkProducer bodyExpr [] .TVoid g).run procEnv |>.run st with + | some (fgl, st') => + globalCounter := st'.freshCounter + allBoxConstructors := allBoxConstructors ++ st'.usedBoxConstructors.filter + (fun (c, _, _) => !allBoxConstructors.any (fun (c2, _, _) => c == c2)) + let newHoles := (st'.usedHoles.map fun (name, det, outTy) => (name, det, inputList, outTy)).filter + (fun (n, _, _, _) => !allHoles.any (fun (n2, _, _, _) => n == n2)) + allHoles := allHoles ++ newHoles + let projected := projectBody bodyExpr.md fgl + let md := bodyExpr.md + let heapInParam : Laurel.Parameter := { name := Identifier.mk "$heap_in" none, type := mkHighTypeMd md .THeap } + let heapOutParam : Laurel.Parameter := { name := Identifier.mk "$heap" none, type := mkHighTypeMd md .THeap } + let errOutParam : Laurel.Parameter := { name := Identifier.mk "maybe_except" none, type := mkHighTypeMd md (.TCore "Error") } + let resultOutputs := proc.outputs.filter fun o => eraseType o.type.val != .TCore "Error" + match g with + | .heap => + let heapInit := mkLaurel md (.Assign [mkLaurel md (.Identifier (Identifier.mk "$heap" none))] (mkLaurel md (.Identifier (Identifier.mk "$heap_in" none)))) + let newBody := mkLaurel md (.Block ([heapInit] ++ (projectProducer fgl)) none) + procs := procs ++ [{ proc with + inputs := [heapInParam] ++ proc.inputs + outputs := [heapOutParam] ++ resultOutputs + body := .Transparent newBody }] + | .heapErr => + let heapInit := mkLaurel md (.Assign [mkLaurel md (.Identifier (Identifier.mk "$heap" none))] (mkLaurel md (.Identifier (Identifier.mk "$heap_in" none)))) + let newBody := mkLaurel md (.Block ([heapInit] ++ (projectProducer fgl)) none) + procs := procs ++ [{ proc with + inputs := [heapInParam] ++ proc.inputs + outputs := [heapOutParam] ++ resultOutputs ++ [errOutParam] + body := .Transparent newBody }] + | .err => + procs := procs ++ [{ proc with + outputs := resultOutputs ++ [errOutParam] + body := .Transparent projected }] + | .proc | .pure => + procs := procs ++ [{ proc with body := .Transparent projected }] + | none => + elabFailures := elabFailures ++ [proc.name.text] + procs := procs ++ [proc] + | _ => procs := procs ++ [proc] + let hasHeap := knownGrades.toList.any fun (_, g) => g == .heap || g == .heapErr + let compositeNames := typeEnv.classFields.toList.map (·.1) + let typeTagDatatype : TypeDefinition := .Datatype { + name := "TypeTag", typeArgs := [], + constructors := compositeNames.map fun n => { name := Identifier.mk (n ++ "_TypeTag") none, args := [] } } + let compositeType : TypeDefinition := .Datatype { + name := "Composite", typeArgs := [], + constructors := [{ name := Identifier.mk "MkComposite" none, args := [ + { name := Identifier.mk "ref" none, type := ⟨.TInt, #[]⟩ }, + { name := Identifier.mk "typeTag" none, type := ⟨.UserDefined "TypeTag", #[]⟩ }] }] } + let fieldConstructors := typeEnv.classFields.toList.foldl (fun acc (className, fields) => + acc ++ fields.map fun (fieldName, _) => + { name := Identifier.mk ("$field." ++ className ++ "." ++ fieldName) none, args := [] : DatatypeConstructor }) [] + let fieldDatatype : TypeDefinition := .Datatype { + name := "Field", typeArgs := [], constructors := fieldConstructors } + let boxConstructors := allBoxConstructors.map fun (ctorName, _, ty) => + { name := Identifier.mk ctorName none, args := [ + { name := Identifier.mk (boxFieldName ty) none, type := ⟨boxFieldType ty, #[]⟩ }] : DatatypeConstructor } + let boxDatatype : TypeDefinition := .Datatype { + name := "Box", typeArgs := [], constructors := boxConstructors } + let holeProcs := allHoles.map fun (name, deterministic, inputs, outTy) => + let params := inputs.map fun (pName, pType) => + ({ name := Identifier.mk pName none, type := ⟨pType, #[]⟩ } : Laurel.Parameter) + let outputParam : Laurel.Parameter := { name := Identifier.mk "result" none, type := ⟨outTy, #[]⟩ } + { name := Identifier.mk name none + inputs := if deterministic then params else [] + outputs := [outputParam] + preconditions := [] + determinism := if deterministic then .deterministic none else .nondeterministic + decreases := none + isFunctional := true + body := .Opaque [] none [] + md := #[] : Laurel.Procedure } + let result := if hasHeap then + let heapTypesFiltered := heapConstants.types.filter fun td => match td with + | .Datatype dt => dt.name.text != "Composite" && dt.name.text != "NotSupportedYet" + | _ => true + { program with + staticProcedures := holeProcs ++ coreDefinitionsForLaurel.staticProcedures ++ heapConstants.staticProcedures ++ procs + types := [fieldDatatype, boxDatatype, typeTagDatatype, compositeType] ++ heapTypesFiltered ++ coreDefinitionsForLaurel.types ++ program.types } + else + { program with + staticProcedures := holeProcs ++ coreDefinitionsForLaurel.staticProcedures ++ procs + types := [typeTagDatatype, compositeType] ++ coreDefinitionsForLaurel.types ++ program.types } + pure (result, elabFailures) + +end +end Strata.FineGrainLaurel + diff --git a/Strata/Languages/FineGrainLaurel/FineGrainLaurel.dialect.st b/Strata/Languages/FineGrainLaurel/FineGrainLaurel.dialect.st new file mode 100644 index 0000000000..eb2448a1b4 --- /dev/null +++ b/Strata/Languages/FineGrainLaurel/FineGrainLaurel.dialect.st @@ -0,0 +1,213 @@ +// FineGrainLaurel Dialect: FGCBV (Fine-Grain Call-By-Value) with explicit polarity +// This dialect extends Laurel with separate Value and Producer categories, +// making polarity a representation-level invariant rather than a runtime predicate. +// +// Changes in this file are not automatically tracked by the build system. +// Modify FineGrainLaurel.lean (e.g. update its comment) to trigger a rebuild after changing this file. + +dialect FineGrainLaurel; +// Note: Not importing Laurel for now - FineGrainLaurel is self-contained + +// Import Laurel types for reuse +category LaurelType; +op intType : LaurelType => "int"; +op boolType : LaurelType => "bool"; +op realType : LaurelType => "real"; +op float64Type : LaurelType => "float64"; +op stringType : LaurelType => "string"; +op coreType (name: Ident): LaurelType => "Core " name; +op mapType (keyType: LaurelType, valueType: LaurelType): LaurelType => "Map " keyType " " valueType; +op compositeType (name: Ident): LaurelType => name; + +// =========================================================================== +// FGCBV Core: Separate Value and Producer categories +// =========================================================================== + +// Value category: inert terms (no effects, can be duplicated/discarded) +category Value; + +// Producer category: effectful terms (must be sequenced, single-use) +category Producer; + +// =========================================================================== +// Value Operators (Inert Terms) +// =========================================================================== + +// Literals +op valLiteralInt (n: Num): Value => n; +op valLiteralBool (b: Bool): Value => b; +op valLiteralReal (d: Decimal): Value => d; +op valLiteralString (s: Str): Value => s; + +// Variables +op valVar (name: Ident): Value => name; + +// Pure binary operations (no effects) +op valAdd (lhs: Value, rhs: Value): Value => @[prec(60), leftassoc] lhs " + " rhs; +op valSub (lhs: Value, rhs: Value): Value => @[prec(60), leftassoc] lhs " - " rhs; +op valMul (lhs: Value, rhs: Value): Value => @[prec(70), leftassoc] lhs " * " rhs; +op valDiv (lhs: Value, rhs: Value): Value => @[prec(70), leftassoc] lhs " / " rhs; +op valMod (lhs: Value, rhs: Value): Value => @[prec(70), leftassoc] lhs " % " rhs; + +// Pure comparison operations +op valEq (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " == " rhs; +op valNeq (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " != " rhs; +op valLt (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " < " rhs; +op valLe (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " <= " rhs; +op valGt (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " > " rhs; +op valGe (lhs: Value, rhs: Value): Value => @[prec(40)] lhs " >= " rhs; + +// Pure logical operations +op valAnd (lhs: Value, rhs: Value): Value => @[prec(30), leftassoc] lhs " & " rhs; +op valOr (lhs: Value, rhs: Value): Value => @[prec(20), leftassoc] lhs " | " rhs; +op valNot (inner: Value): Value => @[prec(80)] "!" inner; + +// Pure unary operations +op valNeg (inner: Value): Value => @[prec(80)] "-" inner; + +// Field access (pure) +op valFieldAccess (obj: Value, field: Ident): Value => @[prec(90)] obj "#" field; + +// Parenthesis (for grouping) +op valParens (inner: Value): Value => "(" inner ")"; + +// =========================================================================== +// Producer Operators (Effectful Terms) +// =========================================================================== + +// Return a value (terminal producer) +op prodReturnValue (value: Value): Producer => @[prec(0)] "return " value:0; + +// Call a procedure (effectful) +op prodCall (callee: Ident, args: CommaSepBy Value): Producer => callee "(" args ")"; + +// Let-binding for producers (sequence effects) +// let x: ty = prod in body +op prodLetProd (var: Ident, ty: LaurelType, prod: Producer, body: Producer): Producer => + @[prec(0)] "let " var ": " ty " = " prod:0 " in " body:0; + +// Let-binding for values (introduce binding for a value) +// let x: ty = value in body +op prodLetValue (var: Ident, ty: LaurelType, value: Value, body: Producer): Producer => + @[prec(0)] "let " var ": " ty " = " value:0 " in " body:0; + +// Assignment (mutation) +op prodAssign (target: Value, value: Value, body: Producer): Producer => + @[prec(0)] target " := " value:0 ";" body:0; + +// Variable declaration with initialization +op prodVarDecl (name: Ident, ty: LaurelType, init: Value, body: Producer): Producer => + @[prec(0)] "var " name ": " ty " := " init:0 ";" body:0; + +// Conditional (if-then-else) +op prodIfThenElse (cond: Value, thenBranch: Producer, elseBranch: Producer): Producer => + @[prec(0)] "if " cond " then " thenBranch:0 " else " elseBranch:0; + +// Assert (specification) +op prodAssert (cond: Value, body: Producer): Producer => + @[prec(0)] "assert " cond:0 ";" body:0; + +// Assume (specification) +op prodAssume (cond: Value, body: Producer): Producer => + @[prec(0)] "assume " cond:0 ";" body:0; + +// While loop +category Invariant; +op invariant (cond: Value): Invariant => "invariant " cond:0; + +op prodWhile (cond: Value, invariants: Seq Invariant, body: Producer, after: Producer): Producer => + @[prec(0)] "while (" cond ")" invariants " " body:0 after:0; + +// Instantiation (heap allocation) +op prodNew (name: Ident, resultVar: Ident, ty: LaurelType, body: Producer): Producer => + @[prec(0)] "let " resultVar ": " ty " = new " name " in " body:0; + +// Call with error handling +op prodCallWithError (callee: Ident, args: CommaSepBy Value, + resultVar: Ident, errorVar: Ident, + resultTy: LaurelType, errorTy: LaurelType, + body: Producer): Producer => + @[prec(0)] "let [" resultVar ": " resultTy ", " errorVar ": " errorTy "] = " callee "(" args ") in " body:0; + +// Sequence (statement sequencing) +op prodSeq (first: Producer, second: Producer): Producer => + @[prec(5)] first:5 ";" second:5; + +// Block with multiple producers +op prodBlock (stmts: SemicolonSepBy Producer): Producer => + @[prec(1000)] "{" stmts "}"; + +// Exit a labelled block (break/continue control flow) +op prodExit (label: Str): Producer => "exit " label; + +// Labeled block (target of prodExit — models break/continue) +op prodLabeledBlock (label: Str, body: Producer): Producer => + @[prec(0)] "block " label " {" body:0 "}"; + +// =========================================================================== +// Top-level Declarations (reuse Laurel structure) +// =========================================================================== + +category Parameter; +op parameter (name: Ident, paramType: LaurelType): Parameter => name ":" paramType; + +category ReturnParameters; +op returnParameters (parameters: CommaSepBy Parameter): ReturnParameters => "returns" "(" parameters ")"; + +category ErrorSummary; +op errorSummary (msg: Str): ErrorSummary => "summary" msg; + +category RequiresClause; +op requiresClause (cond: Value, errorMessage: Option ErrorSummary): RequiresClause => "requires" cond:0 errorMessage; + +category EnsuresClause; +op ensuresClause (cond: Value, errorMessage: Option ErrorSummary): EnsuresClause => "ensures" cond:0 errorMessage; + +category ModifiesClause; +op modifiesClause (refs: CommaSepBy Value): ModifiesClause => "modifies" refs; + +category ProcedureBody; +op procedureBody (body: Producer): ProcedureBody => body:0; +op externalBody: ProcedureBody => "external"; + +category Procedure; +op procedure (name: Ident, parameters: CommaSepBy Parameter, + returnParameters: Option ReturnParameters, + requires: Seq RequiresClause, + ensures: Seq EnsuresClause, + modifies: Seq ModifiesClause, + body: Option ProcedureBody): Procedure => + "procedure " name "(" parameters ")" returnParameters requires ensures modifies body ";"; + +category Field; +op mutableField (name: Ident, fieldType: LaurelType): Field => "var " name ":" fieldType; +op immutableField (name: Ident, fieldType: LaurelType): Field => name ":" fieldType; + +category Extends; +op extends (parents: CommaSepBy Ident): Extends => "extends " parents; + +category Composite; +op composite (name: Ident, extending: Option Extends, fields: Seq Field, procedures: Seq Procedure): Composite => + "composite " name extending "{" fields procedures "}"; + +// =========================================================================== +// Value-Level Coercion Operators (Subtyping: infallible, value→value) +// =========================================================================== + +// Upcasts: inject concrete types into Any (pure injections into the sum type) +op valFromInt (inner: Value): Value => "from_int(" inner ")"; +op valFromStr (inner: Value): Value => "from_str(" inner ")"; +op valFromBool (inner: Value): Value => "from_bool(" inner ")"; +op valFromFloat (inner: Value): Value => "from_float(" inner ")"; +op valFromComposite (inner: Value): Value => "from_Composite(" inner ")"; +op valFromListAny (inner: Value): Value => "from_ListAny(" inner ")"; +op valFromDictStrAny (inner: Value): Value => "from_DictStrAny(" inner ")"; +op valFromNone: Value => "from_None()"; + +// =========================================================================== +// Top-level Declarations +// =========================================================================== + +// Top-level commands +op compositeCommand (composite: Composite): Command => composite; +op procedureCommand (procedure: Procedure): Command => procedure; diff --git a/Strata/Languages/FineGrainLaurel/FineGrainLaurel.lean b/Strata/Languages/FineGrainLaurel/FineGrainLaurel.lean new file mode 100644 index 0000000000..74d74ec686 --- /dev/null +++ b/Strata/Languages/FineGrainLaurel/FineGrainLaurel.lean @@ -0,0 +1,24 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +-- FineGrainLaurel dialect definition, loaded from FineGrainLaurel.dialect.st +-- NOTE: Changes to FineGrainLaurel.dialect.st are not automatically tracked by the build system. +-- Update this file (e.g. this comment) to trigger a recompile after modifying FineGrainLaurel.dialect.st. +-- Last grammar change: added prodExit for break/continue control flow preservation. + +module + +public import Strata.DDM.Integration.Lean +public meta import Strata.DDM.Integration.Lean + +namespace Strata.FineGrainLaurel + +public section + +#load_dialect "./FineGrainLaurel.dialect.st" + +#strata_gen FineGrainLaurel + +end diff --git a/Strata/Languages/Laurel/FilterPrelude.lean b/Strata/Languages/Laurel/FilterPrelude.lean index ca5494a0cc..f3d6f4df6a 100644 --- a/Strata/Languages/Laurel/FilterPrelude.lean +++ b/Strata/Languages/Laurel/FilterPrelude.lean @@ -69,7 +69,7 @@ private def addTypeName (name : String) : CollectM Unit := private partial def collectHighTypeNames (ty : HighTypeMd) : CollectM Unit := do match ty.val with | .UserDefined name => addTypeName name.text - | .TCore _ => pure () + | .TCore name => addTypeName name | .TTypedField vt => collectHighTypeNames vt | .TSet et => collectHighTypeNames et | .TMap kt vt => collectHighTypeNames kt; collectHighTypeNames vt diff --git a/Strata/Languages/Laurel/HeapParameterizationConstants.lean b/Strata/Languages/Laurel/HeapParameterizationConstants.lean index 758aa149a1..4a55009383 100644 --- a/Strata/Languages/Laurel/HeapParameterizationConstants.lean +++ b/Strata/Languages/Laurel/HeapParameterizationConstants.lean @@ -16,20 +16,40 @@ namespace Strata.Laurel public section /-- -The Laurel Core prelude defines the heap model types and operations -used by the Laurel-to-Core translator. These declarations are expressed -in Laurel syntax via the `#strata program Laurel` macro and parsed into -a `Laurel.Program` at compile time. - -The heap model uses: -- `Composite` - datatype with a reference (int) and a runtime type tag -- `Field` - abstract type for field names (zero-constructor datatype) -- `TypeTag` - abstract type for type tags (zero-constructor datatype) -- `Heap` - datatype with a `data` map and a `nextReference` for allocation -- `readField` / `updateField` / `increment` - heap access functions - -Note: The `Box` datatype is generated dynamically by `heapParameterization` -based on which field types are actually used in the program. +The heap model runtime interface. These are the types and functions that +elaboration relies on when translating field access, field write, and +heap allocation. + +Types: +``` +datatype Composite { MkComposite(ref: int) } +datatype Heap { MkHeap(data: Map Composite (Map Field Box), nextReference: int) } +datatype Field { ... } (zero-arity constructors generated per class field) +datatype TypeTag { ... } (zero-arity constructors generated per class) +datatype Box { ... } (generated dynamically: BoxInt(intVal: int), BoxString(stringVal: string), etc.) +``` + +Functions (all pure, grade = pure): +``` +readField : (Heap, Composite, Field) → Box +updateField : (Heap, Composite, Field, Box) → Heap +increment : (Heap) → Heap +MkComposite : (int) → Composite +MkHeap : (Map …, int) → Heap +Heap..data! : (Heap) → Map Composite (Map Field Box) +Heap..nextReference! : (Heap) → int +``` + +Datatype accessors/testers follow the DDM pattern: +``` +$field.C.f : () → Field (zero-arity, one per class field) +C_TypeTag : () → TypeTag (zero-arity, one per class) +box_T : (T) → Box (e.g. BoxInt, BoxString, BoxComposite) +unbox_T : (Box) → T (e.g. Box..intVal!, Box..stringVal!) +``` + +Note: `Box` and `Field` constructors are generated dynamically by the +elaborator based on which field types and classes are actually used. -/ private def laurelPreludeDDM := @@ -66,7 +86,20 @@ function increment(heap: Heap): Heap { #end -/-- The Laurel Core prelude as a Laurel Program. -/ +/-- The heap model runtime as a Laurel Program. Elaboration looks up + these functions when translating field access, field write, and allocation. +``` +readField : (Heap, Composite, Field) → Box & pure +updateField : (Heap, Composite, Field, Box) → Heap & pure +increment : (Heap) → Heap & pure +MkComposite : (int, TypeTag) → Composite & pure +Heap..nextReference! : (Heap) → int & pure +$field.C.f : () → Field & pure (generated per class field) +C_TypeTag : () → TypeTag & pure (generated per class) +box_T : (T) → Box & pure (generated per field type used) +unbox_T : (Box) → T & pure (generated per field type used) +``` +-/ def heapConstants : Program := match Laurel.TransM.run none (Laurel.parseProgram laurelPreludeDDM) with | .ok program => program diff --git a/Strata/Languages/Laurel/LaurelToCoreTranslator.lean b/Strata/Languages/Laurel/LaurelToCoreTranslator.lean index 92c8eed29c..656f3433a2 100644 --- a/Strata/Languages/Laurel/LaurelToCoreTranslator.lean +++ b/Strata/Languages/Laurel/LaurelToCoreTranslator.lean @@ -53,7 +53,7 @@ def translateType (model : SemanticModel) (ty : HighTypeMd) : LMonoTy := | .TInt => LMonoTy.int | .TBool => LMonoTy.bool | .TString => LMonoTy.string - | .TVoid => LMonoTy.bool -- Using bool as placeholder for void + | .TVoid => .tcons "Any" [] -- void-returning procs return from_None() which is Any | .THeap => .tcons "Heap" [] | .TTypedField _ => .tcons "Field" [] | .TSet elementType => Core.mapTy (translateType model elementType) LMonoTy.bool @@ -65,6 +65,7 @@ def translateType (model : SemanticModel) (ty : HighTypeMd) : LMonoTy := | _ => .tcons "Composite" [] -- fallback for unresolved refs | .TCore s => .tcons s [] | .TReal => LMonoTy.real + | .TFloat64 => LMonoTy.real | .Unknown => .tcons "Any" [] -- TODO, abort execution since there is no valid Core type to translate Unknown to | _ => .tcons "NotSupportedYet" [] -- TODO, abort execution since there is no valid Core type to translate Unknown to termination_by ty.val @@ -810,6 +811,80 @@ def translate (options: LaurelTranslateOptions) (program : Program): TranslateRe let (core, diags, _) := translateWithLaurel options program (core, diags) +/-- +Minimal Laurel-to-Core pipeline for V2: resolve + inferHoleTypes + Core translation. +Skips old lowering passes (heapParameterization, typeHierarchy, modifiesClauses, +eliminateHoles, desugarShortCircuit, liftExpressionAssignments, eliminateReturns, +constrainedTypeElim) — those are subsumed by Elaboration in the V2 pipeline. +(`resolve` + `inferHoleTypes` are old-resolver tech debt — see PythonDoc Tech Debt.) +-/ +def translateMinimal (options : LaurelTranslateOptions) (program : Program) : TranslateResultWithLaurel := + -- NOTE: coreDefinitionsForLaurel is already prepended by unifiedElaborate (Elaborate.lean:2044). + -- Do NOT prepend it again here — that causes duplicate procedure definitions. + -- Step 1: Resolve (build SemanticModel) + let result := resolve program + let resolutionErrors : List DiagnosticModel := if options.emitResolutionErrors then result.errors.toList else [] + let (program, model) := (result.program, result.model) + -- Step 2: inferHoleTypes (cleanup) + let program := inferHoleTypes model program + -- Re-resolve after inferHoleTypes to ensure model is up-to-date + let result := resolve program (some model) + let (program, model) := (result.program, result.model) + -- Step 3: Core translation + let initState : TranslateState := { model := model } + let translateToCore : TranslateM Core.Program := do + let model := (← get).model + let sccDecls := computeSccDecls program + let orderedDecls ← sccDecls.flatMapM (fun (procs, isRecursive) => do + let isFuncSCC := procs.all (·.isFunctional) + if isFuncSCC then + let funcs ← procs.mapM (translateProcedureToFunction options isRecursive) + if isRecursive then + let coreFuncs := funcs.filterMap (fun d => match d with + | .func f _ => some f + | _ => none) + return [Core.Decl.recFuncBlock coreFuncs mdWithUnknownLoc] + else + return funcs + else + procs.flatMapM fun proc => do + let axiomDecls : List Core.Decl ← match proc.invokeOn with + | none => pure [] + | some trigger => do + let axDecl? ← translateInvokeOnAxiom proc trigger + pure axDecl?.toList + let procDecl ← translateProcedure proc + return [Core.Decl.proc procDecl proc.md] ++ axiomDecls + ) + let constantDecls ← program.constants.mapM fun c => do + let coreTy := translateType model c.type + let body ← c.initializer.mapM (translateExpr ·) + return Core.Decl.func { + name := ⟨c.name.text, ()⟩ + typeArgs := [] + inputs := [] + output := coreTy + body := body + } mdWithUnknownLoc + let laurelDatatypes := program.types.filterMap fun td => match td with + | .Datatype dt => some dt + | _ => none + let ldatatypes := laurelDatatypes.map (translateDatatypeDefinition model) + let groups := groupDatatypes laurelDatatypes ldatatypes + let groupedDatatypeDecls := groups.map fun group => Core.Decl.type (.data group) mdWithUnknownLoc + -- Emit diagnostics for composite types that have instance procedures. + for td in program.types do + if let .Composite ct := td then + for proc in ct.instanceProcedures do + emitDiagnostic $ proc.md.toDiagnostic + s!"Instance procedure '{proc.name.text}' on composite type '{ct.name.text}' is not yet supported" + DiagnosticType.NotYetImplemented + pure { decls := groupedDatatypeDecls ++ constantDecls ++ orderedDecls } + let (coreProgramOption, translateState) := runTranslateM initState translateToCore + let allDiagnostics := resolutionErrors ++ translateState.diagnostics + let coreProgramOption := if translateState.coreProgramHasSuperfluousErrors then none else coreProgramOption + (coreProgramOption, allDiagnostics, program) + /-- Verify a Laurel program using an SMT solver -/ diff --git a/Strata/Languages/Python/ExprTranslation.lean b/Strata/Languages/Python/ExprTranslation.lean new file mode 100644 index 0000000000..e945ac90fe --- /dev/null +++ b/Strata/Languages/Python/ExprTranslation.lean @@ -0,0 +1,353 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module + +public import Strata.Languages.Laurel.Laurel +public import Strata.Languages.Python.PythonDialect +public import Strata.Languages.Python.Resolution +import Strata.DDM.Util.SourceRange + +/-! +# Python Expression Translation (Type-Directed, Clean Implementation) + +Clean implementation from first principles: +- Trust user annotations → concrete types +- Type-directed translation (straightforward mapping) +- Proper metadata preservation +- No ad-hoc wrapping in Any + +## Critical Features Implemented +- Literals, variables +- Binary/unary/comparison/boolean operations +- Function calls (StaticCall to Laurel procedures) +- Attribute access (field selection) +- Subscript access (dict/list indexing) +- List/Dict/Tuple construction +- IfExp (ternary operator) +- F-strings (string concatenation) +-/ + +namespace Strata.Python.New + +open Laurel + +public section + +/-! ## Error Types -/ + +inductive TransError where + | unsupportedConstruct (msg : String) (ast : String) + | internalError (msg : String) + deriving Repr + +/-! ## Translation Context -/ + +/-- Function/method signature for dispatch -/ +structure FuncSig where + name : String + paramNames : List String + deriving Inhabited + +structure TransContext where + filePath : String + -- Type environment: variable name → type name + typeEnv : Std.HashMap String String := {} + -- Function signatures: qualified name → param names (for kwarg resolution) + funcSigs : Std.HashMap String FuncSig := {} + -- Resolution environment from nanopass: classifies names structurally + resolvedEnv : ResolvedEnv := {} + +/-! ## Smart Constructors -/ + +/-- Convert SourceRange to Laurel metadata -/ +def sourceRangeToMetaData (filePath : String) (sr : SourceRange) : Imperative.MetaData Core.Expression := + let uri : Uri := .file filePath + let fileRangeElt := ⟨ Imperative.MetaData.fileRange, .fileRange ⟨ uri, sr ⟩ ⟩ + #[fileRangeElt] + +/-- Smart constructor: Create StmtExprMd with source location -/ +def mkExpr (ctx : TransContext) (sr : SourceRange) (expr : StmtExpr) : StmtExprMd := + { val := expr, md := sourceRangeToMetaData ctx.filePath sr } + +/-! ## Helper Functions -/ + +/-- Build list construction (simplified - direct representation) -/ +def mkList (ctx : TransContext) (sr : SourceRange) (elements : List StmtExprMd) : StmtExprMd := + -- Lists as procedure call: List_new(elem1, elem2, ...) + mkExpr ctx sr (.StaticCall "List_new" elements) + +/-- Build dict construction (simplified - direct representation) -/ +def mkDict (ctx : TransContext) (sr : SourceRange) (keys values : List StmtExprMd) : Except TransError StmtExprMd := do + if keys.length != values.length then + throw (.internalError "Dict keys/values length mismatch") + -- Dict as procedure call: Dict_new(k1, v1, k2, v2, ...) + let kvPairs := List.zip keys values + let flatArgs := kvPairs.flatMap (fun (k, v) => [k, v]) + pure (mkExpr ctx sr (.StaticCall "Dict_new" flatArgs)) + +/-- Build tuple construction (simplified - direct representation) -/ +def mkTuple (ctx : TransContext) (sr : SourceRange) (elements : List StmtExprMd) : StmtExprMd := + -- Tuples as procedure call: Tuple_new(elem1, elem2, ...) + mkExpr ctx sr (.StaticCall "Tuple_new" elements) + +/-! ## Keyword Argument Resolution -/ + +/-- Resolve keyword arguments against a function signature. + With type annotations, we know parameter positions. + Just append kwargs as positional args in signature order. -/ +def resolveArgs (ctx : TransContext) (funcName : String) + (posArgs : List StmtExprMd) (kwargs : List (String × StmtExprMd)) + : Except TransError (List StmtExprMd) := do + if kwargs.isEmpty then + pure posArgs + else + -- Look up signature to determine parameter order + match ctx.funcSigs[funcName]? with + | some sig => + -- Place kwargs in correct positions based on param names + let numPos := posArgs.length + let remainingParams := sig.paramNames.drop numPos + let mut ordered := posArgs + for paramName in remainingParams do + match kwargs.find? (fun (name, _) => name == paramName) with + | some (_, val) => ordered := ordered ++ [val] + | none => pure () -- Optional param not provided + pure ordered + | none => + -- No signature known: just append kwargs in order + pure (posArgs ++ kwargs.map (·.2)) + +/-! ## Core Translation -/ + +/-- Translate Python expression to Laurel StmtExpr. + Clean implementation with proper metadata preservation. +-/ +partial def translateExpr (ctx : TransContext) (e : Python.expr SourceRange) + : Except TransError StmtExprMd := do + match e with + -- Literals + | .Constant sr (.ConPos _ n) _ => + pure (mkExpr ctx sr (.LiteralInt n.val)) + | .Constant sr (.ConNeg _ n) _ => + pure (mkExpr ctx sr (.LiteralInt (-n.val))) + | .Constant sr (.ConString _ s) _ => + pure (mkExpr ctx sr (.LiteralString s.val)) + | .Constant sr (.ConTrue _) _ => + pure (mkExpr ctx sr (.LiteralBool true)) + | .Constant sr (.ConFalse _) _ => + pure (mkExpr ctx sr (.LiteralBool false)) + | .Constant sr (.ConNone _) _ => + -- None as special constant (or could be Hole) + pure (mkExpr ctx sr (.StaticCall "None" [])) + | .Constant sr (.ConBytes _ _) _ => pure (mkExpr ctx sr .Hole) + | .Constant sr (.ConFloat _ f) _ => + -- Float: wrap in from_float prelude call with the string representation + -- Model as StaticCall to from_float with the string value for later resolution + pure (mkExpr ctx sr (.StaticCall "from_float" [mkExpr ctx sr (.LiteralString f.val)])) + | .Constant sr (.ConComplex _ _ _) _ => pure (mkExpr ctx sr .Hole) + | .Constant sr (.ConEllipsis _) _ => pure (mkExpr ctx sr .Hole) + + -- Variable references + | .Name sr name _ => + pure (mkExpr ctx sr (.Identifier name.val)) + + -- Binary operations + | .BinOp sr left op right => do + let leftExpr ← translateExpr ctx left + let rightExpr ← translateExpr ctx right + let preludeOp ← match op with + | .Add _ => .ok "PAdd" + | .Sub _ => .ok "PSub" + | .Mult _ => .ok "PMul" + | .Div _ => .ok "PDiv" + | .FloorDiv _ => .ok "PFloorDiv" + | .Mod _ => .ok "PMod" + | .Pow _ => .ok "PPow" + | .BitAnd _ => .ok "PBitAnd" + | .BitOr _ => .ok "PBitOr" + | .BitXor _ => .ok "PBitXor" + | .LShift _ => .ok "PLShift" + | .RShift _ => .ok "PRShift" + | .MatMult _ => throw (.unsupportedConstruct "Matrix mult (@) not supported" "") + pure (mkExpr ctx sr (.StaticCall preludeOp [leftExpr, rightExpr])) + + -- Comparison operations + | .Compare sr left ops comparators => do + if ops.val.size != 1 || comparators.val.size != 1 then + throw (.unsupportedConstruct "Chained comparisons not supported" "") + let leftExpr ← translateExpr ctx left + let rightExpr ← translateExpr ctx comparators.val[0]! + let preludeOp ← match ops.val[0]! with + | .Eq _ => .ok "PEq" + | .NotEq _ => .ok "PNEq" + | .Lt _ => .ok "PLt" + | .LtE _ => .ok "PLe" + | .Gt _ => .ok "PGt" + | .GtE _ => .ok "PGe" + | .In _ => .ok "PIn" + | .NotIn _ => .ok "PNotIn" + | .Is _ => .ok "PIs" + | .IsNot _ => .ok "PIsNot" + pure (mkExpr ctx sr (.StaticCall preludeOp [leftExpr, rightExpr])) + + -- Boolean operations + | .BoolOp sr op values => do + if values.val.size < 2 then + throw (.internalError "BoolOp must have at least 2 operands") + let preludeOp ← match op with + | .And _ => .ok "PAnd" + | .Or _ => .ok "POr" + -- Translate all operands + let mut exprs : List StmtExprMd := [] + for val in values.val do + let expr ← translateExpr ctx val + exprs := exprs ++ [expr] + -- Chain binary operations: a && b && c becomes (a && b) && c + let mut result := exprs[0]! + for i in [1:exprs.length] do + result := mkExpr ctx sr (.StaticCall preludeOp [result, exprs[i]!]) + pure result + + -- Unary operations + | .UnaryOp sr op operand => do + let operandExpr ← translateExpr ctx operand + let preludeOp ← match op with + | .Not _ => .ok "PNot" + | .USub _ => .ok "PNeg" + | .UAdd _ => .ok "PPos" + | .Invert _ => .ok "PInvert" + pure (mkExpr ctx sr (.StaticCall preludeOp [operandExpr])) + + -- Function/Method Call: resolved via nanopass (no name classification here) + | .Call sr func args kwargs => do + -- Resolve call structurally via resolution environment + let resolved := resolveCall ctx.resolvedEnv sr func args.val kwargs.val + -- Exhaustive pattern match on resolved call — each branch determines Laurel node + match resolved with + | .classNew className callArgs _callKwargs => do + -- Resolution determined this is a class: structurally emit .New + -- Constructor args will be passed to __init__ separately + let _translatedArgs ← callArgs.toList.mapM (translateExpr ctx) + pure (mkExpr ctx sr (.New (Identifier.mk className none))) + | .funcCall funcName callArgs callKwargs => do + let posArgs ← callArgs.toList.mapM (translateExpr ctx) + let kwargPairs ← callKwargs.toList.filterMapM (fun kw => do + match kw with + | .mk_keyword _ kwName kwExpr => do + let val ← translateExpr ctx kwExpr + match kwName.val with + | some n => pure (some (n.val, val)) + | none => pure none) + let allArgs ← resolveArgs ctx funcName posArgs kwargPairs + pure (mkExpr ctx sr (.StaticCall funcName allArgs)) + | .methodCall receiver methodName callArgs callKwargs => do + let objExpr ← translateExpr ctx receiver + let posArgs ← callArgs.toList.mapM (translateExpr ctx) + let kwargPairs ← callKwargs.toList.filterMapM (fun kw => do + match kw with + | .mk_keyword _ kwName kwExpr => do + let val ← translateExpr ctx kwExpr + match kwName.val with + | some n => pure (some (n.val, val)) + | none => pure none) + -- Qualify method name with receiver type + let receiverType := match receiver with + | .Name _ name _ => ctx.typeEnv[name.val]?.getD "Any" + | _ => "Any" + let qualifiedName := s!"{receiverType}@{methodName}" + let allArgs ← resolveArgs ctx qualifiedName (objExpr :: posArgs) kwargPairs + pure (mkExpr ctx sr (.StaticCall qualifiedName allArgs)) + + -- Attribute access: obj.field + | .Attribute sr obj attr _ => do + let objExpr ← translateExpr ctx obj + -- Direct field selection + pure (mkExpr ctx sr (.FieldSelect objExpr attr.val)) + + -- Subscript: dict[key] or list[i] + | .Subscript sr container slice _ => do + let containerExpr ← translateExpr ctx container + let indexExpr ← match slice with + | .Slice _ start stop step => do + -- Slice notation: list[start:stop:step] + -- For now, translate as call to Slice operation + let startE ← match start.val with + | some e => translateExpr ctx e + | none => pure (mkExpr ctx sr (.LiteralInt 0)) + let stopE ← match stop.val with + | some e => translateExpr ctx e + | none => pure (mkExpr ctx sr (.LiteralInt (-1))) + if step.val.isSome then + throw (.unsupportedConstruct "Slice step not supported" "") + pure (mkExpr ctx sr (.StaticCall "Slice_new" [startE, stopE])) + | _ => translateExpr ctx slice + -- Subscript as operation: Get(container, index) + pure (mkExpr ctx sr (.StaticCall "Get" [containerExpr, indexExpr])) + + -- List literal: [1, 2, 3] + | .List sr elts _ => do + let elements ← elts.val.toList.mapM (translateExpr ctx) + pure (mkList ctx sr elements) + + -- Tuple literal: (1, 2, 3) + | .Tuple sr elts _ => do + let elements ← elts.val.toList.mapM (translateExpr ctx) + pure (mkTuple ctx sr elements) + + -- Dict literal: {'a': 1, 'b': 2} + | .Dict sr keys vals => do + let keyExprs ← keys.val.toList.mapM (fun optKey => match optKey with + | .some_expr _ e => translateExpr ctx e + | _ => throw (.unsupportedConstruct "Dict with None key" "")) + let valExprs ← vals.val.toList.mapM (translateExpr ctx) + mkDict ctx sr keyExprs valExprs + + -- IfExp: x if cond else y (ternary operator) + | .IfExp sr test body orelse => do + let testExpr ← translateExpr ctx test + let bodyExpr ← translateExpr ctx body + let elseExpr ← translateExpr ctx orelse + pure (mkExpr ctx sr (.IfThenElse testExpr bodyExpr elseExpr)) + + -- F-string: f"{x} is {y}" + | .JoinedStr sr values => do + if values.val.isEmpty then + pure (mkExpr ctx sr (.LiteralString "")) + else + -- Translate each part and concatenate + let parts ← values.val.toList.mapM (translateExpr ctx) + -- Build concatenation via string operations + let mut result := mkExpr ctx sr (.LiteralString "") + for part in parts do + result := mkExpr ctx sr (.StaticCall "StrConcat" [result, part]) + pure result + + -- F-string interpolation: {expr} + | .FormattedValue sr value _ _ => do + let valueExpr ← translateExpr ctx value + -- Convert value to string + pure (mkExpr ctx sr (.StaticCall "ToString" [valueExpr])) + + -- Lambda: lambda x: x + 1 (treat as Hole for now - needs closure support) + | .Lambda sr .. => pure (mkExpr ctx sr .Hole) + + -- Everything else: Hole (preserve source location) + | .Set sr .. => pure (mkExpr ctx sr .Hole) + | .ListComp sr .. => pure (mkExpr ctx sr .Hole) + | .SetComp sr .. => pure (mkExpr ctx sr .Hole) + | .DictComp sr .. => pure (mkExpr ctx sr .Hole) + | .GeneratorExp sr .. => pure (mkExpr ctx sr .Hole) + | .NamedExpr sr .. => pure (mkExpr ctx sr .Hole) + | .Slice sr .. => pure (mkExpr ctx sr .Hole) + | .Starred sr .. => pure (mkExpr ctx sr .Hole) + | .Await sr .. => pure (mkExpr ctx sr .Hole) + | .Yield sr .. => pure (mkExpr ctx sr .Hole) + | .YieldFrom sr .. => pure (mkExpr ctx sr .Hole) + | .TemplateStr sr .. => pure (mkExpr ctx sr .Hole) + | .Interpolation sr .. => pure (mkExpr ctx sr .Hole) + +end -- public section +end Strata.Python.New diff --git a/Strata/Languages/Python/PySpecPipeline.lean b/Strata/Languages/Python/PySpecPipeline.lean index 09b06883cc..3c6e5ba585 100644 --- a/Strata/Languages/Python/PySpecPipeline.lean +++ b/Strata/Languages/Python/PySpecPipeline.lean @@ -15,8 +15,15 @@ import Strata.Languages.Python.Specs import Strata.Languages.Python.Specs.DDM import Strata.Languages.Python.Specs.IdentifyOverloads import Strata.Languages.Python.Specs.ToLaurel +import Strata.Languages.Python.Resolution +import Strata.Languages.Python.Translation +import Strata.Languages.FineGrainLaurel.Elaborate import Strata.Util.DecideProp import Strata.Util.Profile +import Strata.Languages.Laurel.Grammar.ConcreteToAbstractTreeTranslator +import Strata.DDM.Parser +import Strata.DDM.Elab +import Strata.DDM.Elab.LoadedDialects /-! ## PySpec Pipeline @@ -357,6 +364,13 @@ public def translateCombinedLaurel (combined : Laurel.Program) let (coreOption, errors, _) := translateCombinedLaurelWithLowered combined (coreOption, errors) +/-- Minimal Laurel-to-Core for V2: resolve + inferHoleTypes + Core translation. + Skips old lowering passes (subsumed by Elaboration in V2 pipeline). -/ +public def translateCombinedLaurelMinimal (combined : Laurel.Program) + : (Option Core.Program × List DiagnosticModel × Laurel.Program) := + let (coreOption, errors, resolved) := Laurel.translateMinimal { inlineFunctionsWhenPossible := true } combined + (coreOption.map appendCorePartOfRuntime, errors, resolved) + /-- Errors from the pyAnalyzeLaurel pipeline. -/ public inductive PipelineError where /-- The Python source contains invalid code (bad method name, wrong arguments, etc.). -/ @@ -421,4 +435,100 @@ public def pyAnalyzeLaurel profileStep profile "Combine PySpec and user Laurel" do return combinePySpecLaurel filteredPrelude laurelProgram +/-! ### V2 Pipeline (Resolution → Translation → Elaboration → Core) + +The refactored pipeline that uses: +1. NameResolution.buildTypeEnv (build Γ from Python AST) +2. Translation.runTranslation (fold over AST, produce Laurel) +3. FineGrainLaurel.unifiedElaborate (derivation transformation) +4. combinePySpecLaurel + translateCombinedLaurel (existing lowering to Core) +-/ + +/-- Run the V2 pipeline: Resolution → Translation → Elaboration → Core. + + This is the refactored pipeline that uses the unified elaboration pass + instead of the old `pythonToLaurel'` translation + separate lowering passes. + + Steps: + 1. Parse Python AST (reuse existing `Python.readPythonStrata`) + 2. Build TypeEnv: `Resolution.buildTypeEnv stmts |>.withPrelude` + 3. Run Translation: `Translation.runTranslation stmts typeEnv filePath` + 4. Run Elaboration: `FineGrainLaurel.unifiedElaborate typeEnv laurelProgram` + 5. Combine with runtime: `combinePySpecLaurel pythonRuntimeLaurelPart elaboratedProgram` + 6. Run existing `translateCombinedLaurel` (Laurel → Core) -/ +public def pyAnalyzeLaurelV2 + (pythonIonPath : String) + (sourcePath : Option String := none) + (profile : Bool := false) + (quiet : Bool := false) + : EIO PipelineError Laurel.Program := do + -- quiet will be used when elaboration Phase 1 is enabled + let _ := quiet + -- Step 1: Parse Python AST + let stmts ← profileStep profile "Read Python Ion" do + match ← Python.readPythonStrata pythonIonPath |>.toBaseIO with + | .ok r => pure r + | .error msg => throw (.internal msg) + + -- Step 2: Resolution (scope the Python AST, loading imports on demand) + let baseDir := System.FilePath.mk pythonIonPath |>.parent.getD "." + let resolveResult ← profileStep profile "Resolution (scope Python AST)" do + match ← (Python.Resolution.resolve stmts baseDir).toBaseIO with + | .ok r => pure r + | .error msg => throw (.internal s!"Resolution failed: {msg}") + let resolvedStmts := resolveResult.program + let demandedStmts := resolveResult.demandedStmts + + -- Step 3: Translation. User code translated normally. Imported stubs: only the + -- methods/functions actually called (demandedStmts) are translated, as separate + -- procedures; demanded classes become Composite type declarations. + let metadataPath := sourcePath.getD pythonIonPath + let importedLaurel ← profileStep profile "Translate demanded imported decls" do + let importedProg : Python.Resolution.ResolvedPythonProgram := + { stmts := demandedStmts, moduleLocals := [] } + match Python.Translation.runTranslation importedProg metadataPath with + | .error _ => pure ({ staticProcedures := [], staticFields := [], types := [], constants := [] } : Laurel.Program) + | .ok (prog, _) => pure prog + -- Composite type declarations for demanded imported classes + let demandedTypes : List Laurel.TypeDefinition := resolveResult.demandedClasses.map fun (clsId, fields) => + let laurelFields : List Laurel.Field := fields.map fun (fId, fTy) => + { name := fId.toLaurel, isMutable := true, type := Python.Translation.mkTypeDefault (Python.Translation.pythonTypeToHighType fTy) } + .Composite { name := clsId.toLaurel, extending := [], fields := laurelFields, instanceProcedures := [] } + let userLaurel ← profileStep profile "Translate Python to Laurel (V2)" do + match Python.Translation.runTranslation resolvedStmts metadataPath with + | .error e => match e with + | .userError range msg => throw (.userCode range msg) + | _ => throw (.internal s!"V2 Translation failed: {e}") + | .ok (program, _state) => pure program + + -- Step 4: Elaboration. Imported demanded methods/functions have real bodies + -- (incl. stub holes), so they must be elaborated too — NOT treated as trusted + -- pre-elaborated runtime. Merge them into the elaboration input alongside user code. + let toElaborate : Laurel.Program := { + staticProcedures := userLaurel.staticProcedures ++ importedLaurel.staticProcedures + staticFields := userLaurel.staticFields + types := userLaurel.types ++ importedLaurel.types ++ demandedTypes + constants := userLaurel.constants } + let fullRuntime : Laurel.Program := Python.pythonRuntimeLaurelPart + let elaboratedProgram ← profileStep profile "Elaborate (full: coercions + type infrastructure)" do + let runtimeGrades := fullRuntime.staticProcedures.foldl (fun acc proc => + acc.insert proc.name.text (FineGrainLaurel.gradeFromSignature proc)) + ({} : Std.HashMap String FineGrainLaurel.Grade) + match FineGrainLaurel.fullElaborate toElaborate fullRuntime runtimeGrades with + | .error e => throw (.internal s!"Elaboration failed: {e}") + | .ok (prog, failures) => + unless failures.isEmpty do + throw (.internal s!"Elaboration failed for: {String.intercalate ", " failures}") + pure prog + + -- Step 6: Filter prelude (remove unused procedures that would cause type errors in Core) + let filteredPrelude ← profileStep profile "Filter prelude" do + match Laurel.filterPrelude fullRuntime elaboratedProgram with + | .ok prog => pure prog + | .error msg => throw (.internal msg) + + -- Step 7: Combine with filtered runtime + profileStep profile "Combine with runtime" do + return combinePySpecLaurel filteredPrelude elaboratedProgram + end Strata diff --git a/Strata/Languages/Python/PythonRuntimeLaurelPart.lean b/Strata/Languages/Python/PythonRuntimeLaurelPart.lean index 4671dd1571..c3e9dad4b8 100644 --- a/Strata/Languages/Python/PythonRuntimeLaurelPart.lean +++ b/Strata/Languages/Python/PythonRuntimeLaurelPart.lean @@ -80,6 +80,7 @@ datatype Any { from_ListAny (as_ListAny : ListAny), from_ClassInstance (classname : string, instance_attributes: DictStrAny), from_Slice(start: int, stop: OptionInt), + from_Composite (as_Composite: Composite), exception (get_error: Error) } @@ -319,6 +320,14 @@ function Any_to_bool (v: Any) : bool //WILL BE ADDED }; +// Python truthiness per type: the subtyping coercions T <: bool. + +function int_to_bool (n: int) : bool { !(n == 0) }; +function str_to_bool (s: string) : bool { !(s == "") }; +function float_to_bool (f: real) : bool { !(f == 0.0) }; +function list_to_bool (l: ListAny) : bool { !(l == ListAny_nil()) }; +function dict_to_bool (d: DictStrAny) : bool { !(d == DictStrAny_empty()) }; + // ///////////////////////////////////////////////////////////////////////////////////// // ListAny functions // ///////////////////////////////////////////////////////////////////////////////////// @@ -531,6 +540,35 @@ function PNotIn ( v: Any, dictOrList: Any) : Any function is_IntReal (v: Any) : bool; function Any_real_to_int (v: Any) : int; +function Any_type_to_Any (v: Any) : Any; + +function Any_len_to_Any (v: Any) : Any; +function to_int_any (v: Any) : Any; +function to_float_any (v: Any) : Any; +function Any_abs_to_Any (v: Any) : Any; +function Any_isinstance_to_bool (v: Any, t: Any) : bool; +function Any_hasattr_to_bool (v: Any, name: Any) : bool; +function Any_getattr_to_Any (v: Any, name: Any) : Any; +function Any_setattr_to_Any (v: Any, name: Any, val: Any) : Any; +function Any_sorted_to_Any (v: Any) : Any; +function Any_reversed_to_Any (v: Any) : Any; +function Any_enumerate_to_Any (v: Any) : Any; +function Any_zip_to_Any (v: Any, w: Any) : Any; +function Any_range_to_Any (v: Any) : Any; +function Any_list_to_Any (v: Any) : Any; +function Any_dict_to_Any (v: Any) : Any; +function Any_set_to_Any (v: Any) : Any; +function Any_tuple_to_Any (v: Any) : Any; +function Any_min_to_Any (v: Any) : Any; +function Any_max_to_Any (v: Any) : Any; +function Any_sum_to_Any (v: Any) : Any; +function Any_any_to_bool (v: Any) : bool; +function Any_all_to_bool (v: Any) : bool; +function Any_ord_to_Any (v: Any) : Any; +function Any_chr_to_Any (v: Any) : Any; +function Any_map_to_Any (f: Any, v: Any) : Any; +function Any_filter_to_Any (f: Any, v: Any) : Any; + function normalize_any (v : Any) : Any { if v == from_bool(true) then from_int(1) else (if v == from_bool(false) then from_int(0) else @@ -694,6 +732,18 @@ function PMul (v1: Any, v2: Any) : Any exception(UndefinedError ("Operand Type is not defined")) }; +function PDiv (v1: Any, v2: Any) : Any; +function PBitAnd (v1: Any, v2: Any) : Any; +function PBitOr (v1: Any, v2: Any) : Any; +function PBitXor (v1: Any, v2: Any) : Any; +function PLShift (v1: Any, v2: Any) : Any; +function PRShift (v1: Any, v2: Any) : Any; +function PMatMul (v1: Any, v2: Any) : Any; +function PInvert (v1: Any) : Any; +function PPos (v1: Any) : Any; +function PIs (v1: Any, v2: Any) : bool; +function PIsNot (v1: Any, v2: Any) : bool; + function PFloorDiv (v1: Any, v2: Any) : Any requires (Any..isfrom_bool(v2)==>Any..as_bool!(v2)) && (Any..isfrom_int(v2)==>Any..as_int!(v2)!=0) { diff --git a/Strata/Languages/Python/Resolution.lean b/Strata/Languages/Python/Resolution.lean new file mode 100644 index 0000000000..1f4cc021b0 --- /dev/null +++ b/Strata/Languages/Python/Resolution.lean @@ -0,0 +1,1762 @@ +/- + Copyright Strata Contributors + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module + +public import Strata.Languages.Laurel.Laurel +public import Strata.Languages.Python.PythonDialect +import Strata.DDM.Util.SourceRange +import Strata.Languages.Python.ReadPython + +/-! +# Pass 1: Name Resolution + +Resolution is a fold over the Python AST that threads a growing context +as accumulator. Its job is to **disambiguate** what each AST node means +and attach the result as a `NodeInfo` annotation. The process of +disambiguation produces Laurel-ready identifiers and auxiliary data +(FuncSig, field lists) that Translation uses mechanically. + +**Input:** `Array (Python.stmt SourceRange)` (raw, unscoped) +**Output:** `ResolvedPythonProgram` (scoped, every node annotated with NodeInfo) + +The output AST is the scoping derivation for the Python program — +every node carries proof of what it refers to. + +## Phase Distinction + +All Resolution types are purely Python-level. No `Laurel.Identifier` is +stored anywhere. Translation obtains Laurel identifiers by calling accessor +functions on the Python-level structures. This makes the phase boundary +explicit and prevents mixing. + +## What Resolution Does + +At the top level (module scope), each declaration extends the context: +- `def f(...)` → extends context, annotates FunctionDef with `.funcDecl sig` +- `class C` → extends context with class + methods, annotates with `.classDecl` +- `import M` → extends context internally (module tracked in Ctx only) +- `x : T = ...` → extends context with variable + +At each reference, Resolution annotates with the appropriate `NodeInfo`: +- Name use (variable/function/class) → `.variable name` +- Call (function) → `.funcCall sig` +- Call (class) → `.classNew className initSig` +- Call (method) → `.funcCall sig` (sig has `className = some _`) +- Attribute access → `.attribute name` (bare field name; Elaboration resolves based on receiver type) +- BinOp/Compare/UnaryOp → `.funcCall sig` (operator runtime procedure) +- Unresolvable → `.unresolved` +- Non-reference → `.irrelevant` + +## What Resolution Does NOT + +- Determine effects (Elaboration does that) +- Map PythonType → HighType (Translation does that) +- Emit Laurel constructs (Translation does that) +- Resolve field access to class (Elaboration does that via synthesized receiver type) +-/ + +namespace Strata.Python.Resolution + +open Strata.Laurel + +public section + +/-! ## Core Types + +`PythonIdentifier` is a newtype with a private constructor. The only ways to +create one are from the AST (`.fromAst`), from an import path (`.fromImport`), +or for builtins (`.builtin`). This prevents fabrication of identifiers like +`"ClassName@method"` — all identifiers trace back to source or builtins. -/ + +abbrev PythonExpr := Python.expr SourceRange +abbrev PythonStmt := Python.stmt SourceRange +abbrev PythonProgram := Array PythonStmt +abbrev PythonType := PythonExpr +/-- A Python identifier with a private constructor. Can only be created via `.fromAst`, + `.fromImport`, or `.builtin` — preventing fabrication of identifiers from arbitrary strings. -/ +structure PythonIdentifier where + private mk :: + private val : String + deriving BEq, Hashable, Inhabited, Repr + +def PythonIdentifier.fromAst (n : Ann String SourceRange) : PythonIdentifier := + ⟨n.val⟩ + +def PythonIdentifier.fromImport (modName : Ann String SourceRange) : PythonIdentifier := + match modName.val.splitOn "." with + | first :: _ => ⟨first⟩ + | [] => ⟨modName.val⟩ + +def PythonIdentifier.builtin (name : String) : PythonIdentifier := + ⟨name⟩ + +/-! ## Intermediate Types (mutually recursive) + +These types are mutually recursive because `ParamList` stores resolved default +expressions (`Python.expr ResolvedAnn`) which depend on `ResolvedAnn` which +depends on `NodeInfo` which depends on `FuncSig` which depends on `ParamList`. + +**FuncParams** distinguishes instance methods (with explicit receiver) from +static functions. The receiver is NOT in `ParamList` — it's separated so that +`matchArgs` can handle it correctly (receiver gets its own slot in the zip-fold). + +**FuncSig** carries the Python-level function signature. `params` and `locals` +are private — Translation accesses them only via `matchArgs`, `laurelDeclInputs`, +and `laurelLocals` accessors. + +**NodeInfo** is the output annotation on each AST node. Pattern matching on it +determines Translation's action. Complements: +- `funcDecl` / `funcCall` — declaration and use site of a function +- `classDecl` / `classNew` — declaration and instantiation site of a class +- `withCtx` — resolved `__enter__`/`__exit__` sigs on a with-item +- Operators are `funcCall` with correct arity (2 for binary, 1 for unary) -/ + +mutual + +/-- The parameter list of a function/method, split into required, optional (with defaults), + and keyword-only parameters. Defaults are resolved expressions (carry `ResolvedAnn`). -/ +structure ParamList where + /-- Parameters with no default value — must be provided at every call site. -/ + required : List (PythonIdentifier × PythonType) + /-- Parameters with default values — may be omitted at call sites. -/ + optional : List (PythonIdentifier × PythonType × Python.expr ResolvedAnn) + /-- Keyword-only parameters (after `*` in Python). Default is optional. -/ + kwonly : List (PythonIdentifier × PythonType × Option (Python.expr ResolvedAnn)) + +/-- Distinguishes instance methods (with explicit receiver) from static functions. + The receiver is NOT in `ParamList` — it gets its own slot in `matchArgs`. -/ +inductive FuncParams where + /-- Instance method: first Python param is the receiver (typically `self`). -/ + | instance (receiver : PythonIdentifier) (params : ParamList) + /-- Static function or top-level function: no receiver. -/ + | static (params : ParamList) + +/-- The complete signature of a Python function or method. Carries everything Translation + needs to emit a Laurel procedure declaration and match call-site arguments. -/ +structure FuncSig where + /-- The Python name of the function/method. -/ + name : PythonIdentifier + /-- If this is a method, the class it belongs to. `none` for top-level functions. -/ + className : Option PythonIdentifier + /-- Instance vs static params (receiver separated from ParamList). -/ + params : FuncParams + /-- The declared return type annotation (defaults to Any if absent). -/ + returnType : PythonType + /-- All local variables in the function body (computed by `computeLocals`). -/ + locals : List (PythonIdentifier × PythonType) + /-- Overload index for disambiguated naming. `none` for non-overloaded functions. -/ + overloadIndex : Option Nat := none + /-- The `**kwargs` parameter name, if present. A declared input (Any-typed) but not + matched positionally by `matchArgs`. -/ + kwargName : Option PythonIdentifier := none + +/-- The resolution annotation on each Python AST node. + Each variant carries exactly what Translation needs to emit Laurel. -/ +inductive NodeInfo where + /-- A variable reference (local, param, or global). -/ + | variable (name : PythonIdentifier) + /-- A function/method call site with the callee's full signature. -/ + | funcCall (sig : FuncSig) + /-- A function/method declaration site with its signature. -/ + | funcDecl (sig : FuncSig) + /-- A class instantiation (`ClassName(...)`) with class name and `__init__` sig. -/ + | classNew (className : PythonIdentifier) (initSig : FuncSig) + /-- A class declaration with its fields and method signatures. -/ + | classDecl (name : PythonIdentifier) (attributes : List (PythonIdentifier × PythonType)) (methods : List FuncSig) + /-- An attribute access (bare field name; Elaboration resolves via receiver type). -/ + | attribute (name : PythonIdentifier) + /-- A `with` item with resolved `__enter__` and `__exit__` signatures. -/ + | withCtx (enterSig : FuncSig) (exitSig : FuncSig) + /-- A reference that could not be resolved (unknown name/module). -/ + | unresolved + /-- A non-reference node (literals, operators as nodes, etc.). -/ + | irrelevant + +/-- The annotation type on resolved AST nodes: source range plus resolution info. -/ +structure ResolvedAnn where + /-- Original source location. -/ + sr : SourceRange + /-- What Resolution determined about this node. -/ + info : NodeInfo + +end + +abbrev ResolvedPythonStmt := Python.stmt ResolvedAnn +abbrev ResolvedPythonExpr := Python.expr ResolvedAnn + +instance : Inhabited ParamList where default := { required := [], optional := [], kwonly := [] } +instance : Inhabited FuncParams where default := .static default +instance : Inhabited FuncSig where default := { name := default, className := none, params := default, returnType := .Name SourceRange.none ⟨SourceRange.none, "Any"⟩ (.Load SourceRange.none), locals := [] } +instance : Inhabited NodeInfo where default := .irrelevant +instance : Inhabited ResolvedAnn where default := { sr := .none, info := .irrelevant } + +/-- The output of Resolution: fully-annotated AST plus module-level local list. -/ +structure ResolvedPythonProgram where + /-- The resolved top-level statements. -/ + stmts : Array ResolvedPythonStmt + /-- Module-level local variables (assignment targets at module scope). -/ + moduleLocals : List (PythonIdentifier × PythonType) + +/-! ## Internal Context + +Resolution's working state — NOT exposed to Translation. `Ctx` maps +`PythonIdentifier` keys to `CtxEntry` values. Keys are bare Python names +from the AST (no fabricated compound keys like "ClassName@method"). + +Method lookup goes through `CtxEntry.class_`'s method list, not through +top-level keys. This prevents name collision between methods of different +classes with the same name. + +Within a class body, the context is extended with: +- `self` typed as the enclosing class (enables method resolution on `self`) +- All methods registered under their bare Python names (enables `self.method()` lookup) + +Within a function body, the context is extended with: +- Parameters (a param with no annotation does NOT override a more specific + type already in context, e.g. `self` typed by the enclosing class) +- Locals (Python's scoping rule: any assignment target in the body is function-local) +- FunctionDef/ClassDef names are NOT included in locals (they're declarations) -/ + +/-- An entry in Resolution's context. Determines what a `PythonIdentifier` key refers to. -/ +inductive CtxEntry where + /-- A function or method with its full signature. -/ + | function (sig : FuncSig) + /-- A class with its field list and method signatures. + `methods` holds eagerly-resolved sigs (user classes); `methodAsts` holds raw + method statements for lazy on-demand resolution (imported classes). -/ + | class_ (name : PythonIdentifier) (fields : List (PythonIdentifier × PythonType)) + (methods : List (PythonIdentifier × FuncSig)) + (methodAsts : List (PythonIdentifier × PythonStmt) := []) + /-- A variable with its type annotation. -/ + | variable (ty : PythonType) + /-- An overloaded function: signatures under the same name, matched in order. + Each carries its index, sig, and raw AST (for on-demand body resolution). -/ + | overloadedFunction (overloads : List (Nat × FuncSig × Option PythonStmt)) + /-- An imported module with its resolved context. -/ + | module_ (moduleCtx : Std.DHashMap.Raw PythonIdentifier (fun _ => CtxEntry)) + /-- An imported name whose type/kind is unknown. -/ + | unresolved + deriving Inhabited + +abbrev Ctx := Std.HashMap PythonIdentifier CtxEntry + +/-- An imported module with its source path (for cache filename) and resolved program. -/ +structure ImportedModule where + sourcePath : System.FilePath + program : ResolvedPythonProgram + +/-- State for the resolution monad: collects resolved imported module programs. -/ +structure ResolveState where + importedModules : Array ImportedModule := #[] + resolvedPaths : Std.HashMap String Ctx := {} + /-- Imported class methods resolved on demand (qualified name → resolved FunctionDef stmt). + The pipeline translates only these, not whole imported modules. -/ + demandedMethods : Std.HashMap String ResolvedPythonStmt := {} + /-- Imported top-level functions / overloads resolved on demand + (disambiguated name → resolved FunctionDef stmt). -/ + demandedFunctions : Std.HashMap String ResolvedPythonStmt := {} + /-- Imported classes whose methods/inits were demanded (class name → (id, fields)). + The pipeline emits a Composite type definition for each. -/ + demandedClasses : Std.HashMap String (PythonIdentifier × List (PythonIdentifier × PythonType)) := {} + +/-- The resolution monad. Reader carries baseDir, State collects imported module programs. -/ +abbrev ResolveM := ReaderT System.FilePath (StateT ResolveState (EIO String)) + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Annotation Extraction +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Extract a PythonType from an optional annotation. No annotation defaults to Any. -/ +def annotationToPythonType (ann : Option PythonExpr) : PythonType := + match ann with + | some expr => expr + | none => .Name SourceRange.none ⟨SourceRange.none, "Any"⟩ (.Load SourceRange.none) + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Function Locals (Python scoping: assignment anywhere in body → function-local) +-- ═══════════════════════════════════════════════════════════════════════════════ + +mutual +/-- Collects walrus operator (`:=`) targets from comprehension iterables and filters. -/ +partial def collectWalrusFromComprehensions (comps : List (Python.comprehension SourceRange)) : List PythonIdentifier := + comps.flatMap fun comp => + match comp with + | .mk_comprehension _ _target iter ifs _isAsync => + collectWalrusNames iter ++ ifs.val.toList.flatMap collectWalrusNames + +/-- Extracts assigned names from an assignment target (handles tuple/list unpacking, starred). -/ +partial def collectNamesFromTarget (target : PythonExpr) : List PythonIdentifier := + match target with + | .Name _ n _ => [PythonIdentifier.fromAst n] + | .Tuple _ elems _ => elems.val.toList.flatMap collectNamesFromTarget + | .List _ elems _ => elems.val.toList.flatMap collectNamesFromTarget + | .Starred _ inner _ => collectNamesFromTarget inner + | .Subscript _ _ _ _ => [] + | .Attribute _ _ _ _ => [] + | e => collectWalrusNames e + +/-- Recursively finds all walrus operator (`:=`) targets within an expression tree. -/ +partial def collectWalrusNames (expr : PythonExpr) : List PythonIdentifier := + match expr with + | .NamedExpr _ target _ => collectNamesFromTarget target + | .BinOp _ left _ right => collectWalrusNames left ++ collectWalrusNames right + | .BoolOp _ _ operands => operands.val.toList.flatMap collectWalrusNames + | .UnaryOp _ _ operand => collectWalrusNames operand + | .Compare _ left _ comparators => collectWalrusNames left ++ comparators.val.toList.flatMap collectWalrusNames + | .Call _ func args kwargs => + collectWalrusNames func ++ args.val.toList.flatMap collectWalrusNames ++ + kwargs.val.toList.flatMap fun kw => match kw with | .mk_keyword _ _ val => collectWalrusNames val + | .IfExp _ test body orelse => collectWalrusNames test ++ collectWalrusNames body ++ collectWalrusNames orelse + | .Dict _ keys vals => keys.val.toList.flatMap (fun k => match k with | .some_expr _ e => collectWalrusNames e | .missing_expr _ => []) ++ vals.val.toList.flatMap collectWalrusNames + | .Set _ elts => elts.val.toList.flatMap collectWalrusNames + | .ListComp _ elt generators => collectWalrusNames elt ++ collectWalrusFromComprehensions generators.val.toList + | .SetComp _ elt generators => collectWalrusNames elt ++ collectWalrusFromComprehensions generators.val.toList + | .DictComp _ key value generators => collectWalrusNames key ++ collectWalrusNames value ++ collectWalrusFromComprehensions generators.val.toList + | .GeneratorExp _ elt generators => collectWalrusNames elt ++ collectWalrusFromComprehensions generators.val.toList + | .Await _ inner => collectWalrusNames inner + | .Yield _ valOpt => match valOpt.val with | some v => collectWalrusNames v | none => [] + | .YieldFrom _ inner => collectWalrusNames inner + | .FormattedValue _ value _ _ => collectWalrusNames value + | .JoinedStr _ values => values.val.toList.flatMap collectWalrusNames + | .Subscript _ obj slice _ => collectWalrusNames obj ++ collectWalrusNames slice + | .Attribute _ obj _ _ => collectWalrusNames obj + | .Starred _ inner _ => collectWalrusNames inner + | .Tuple _ elems _ => elems.val.toList.flatMap collectWalrusNames + | .List _ elems _ => elems.val.toList.flatMap collectWalrusNames + | .Slice _ start stop step => + (match start.val with | some e => collectWalrusNames e | none => []) ++ + (match stop.val with | some e => collectWalrusNames e | none => []) ++ + (match step.val with | some e => collectWalrusNames e | none => []) + | .Name _ _ _ => [] + | .Constant _ _ _ => [] + | .Lambda _ _ _ => [] + | .TemplateStr _ _ => [] + | .Interpolation _ _ _ _ _ => [] +end + +/-- Collects all local variable bindings from a statement (assignment targets, for targets, + except-as names, with-as names, walrus targets). Recurses into sub-blocks but NOT into + nested FunctionDef/ClassDef (those introduce their own scope). -/ +partial def collectLocalsFromStmt (s : PythonStmt) : List (PythonIdentifier × PythonType) := + match s with + | .Assign _ targets value _ => + let targetNames := targets.val.toList.flatMap fun target => + (collectNamesFromTarget target).map fun n => (n, annotationToPythonType none) + let rhsWalrus := (collectWalrusNames value).map fun n => (n, annotationToPythonType none) + targetNames ++ rhsWalrus + | .AnnAssign _ target annotation valueOpt _ => + let targetNames := (collectNamesFromTarget target).map fun n => (n, annotation) + let rhsWalrus := match valueOpt.val with + | some v => (collectWalrusNames v).map fun n => (n, annotationToPythonType none) + | none => [] + targetNames ++ rhsWalrus + | .AugAssign _ target _ value => + let targetNames := (collectNamesFromTarget target).map fun n => (n, annotationToPythonType none) + let rhsWalrus := (collectWalrusNames value).map fun n => (n, annotationToPythonType none) + targetNames ++ rhsWalrus + | .If _ test bodyStmts elseStmts => + (collectWalrusNames test).map (fun n => (n, annotationToPythonType none)) ++ + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + elseStmts.val.toList.flatMap collectLocalsFromStmt + | .For _ target iter bodyStmts orelse _ => + let targetNames := (collectNamesFromTarget target).map fun n => (n, annotationToPythonType none) + let iterWalrus := (collectWalrusNames iter).map fun n => (n, annotationToPythonType none) + targetNames ++ iterWalrus ++ + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + orelse.val.toList.flatMap collectLocalsFromStmt + | .While _ cond bodyStmts orelse => + (collectWalrusNames cond).map (fun n => (n, annotationToPythonType none)) ++ + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + orelse.val.toList.flatMap collectLocalsFromStmt + | .Try _ bodyStmts handlers orelse finalbody => + let handlerLocals := handlers.val.toList.flatMap fun h => + match h with + | .ExceptHandler _ _ maybeName handlerBody => + let errorVar := match maybeName.val with + | some n => [(PythonIdentifier.fromAst n, annotationToPythonType none)] + | none => [] + errorVar ++ handlerBody.val.toList.flatMap collectLocalsFromStmt + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + handlerLocals ++ + orelse.val.toList.flatMap collectLocalsFromStmt ++ + finalbody.val.toList.flatMap collectLocalsFromStmt + | .TryStar _ bodyStmts handlers orelse finalbody => + let handlerLocals := handlers.val.toList.flatMap fun h => + match h with + | .ExceptHandler _ _ maybeName handlerBody => + let errorVar := match maybeName.val with + | some n => [(PythonIdentifier.fromAst n, annotationToPythonType none)] + | none => [] + errorVar ++ handlerBody.val.toList.flatMap collectLocalsFromStmt + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + handlerLocals ++ + orelse.val.toList.flatMap collectLocalsFromStmt ++ + finalbody.val.toList.flatMap collectLocalsFromStmt + | .With _ items bodyStmts _ => + let itemLocals := items.val.toList.flatMap fun item => + match item with + | .mk_withitem _ ctxExpr optVars => + let ctxWalrus := (collectWalrusNames ctxExpr).map fun n => (n, annotationToPythonType none) + let varNames := match optVars.val with + | some varExpr => (collectNamesFromTarget varExpr).map fun n => (n, annotationToPythonType none) + | none => [] + ctxWalrus ++ varNames + itemLocals ++ bodyStmts.val.toList.flatMap collectLocalsFromStmt + | .AsyncWith _ items bodyStmts _ => + let itemLocals := items.val.toList.flatMap fun item => + match item with + | .mk_withitem _ ctxExpr optVars => + let ctxWalrus := (collectWalrusNames ctxExpr).map fun n => (n, annotationToPythonType none) + let varNames := match optVars.val with + | some varExpr => (collectNamesFromTarget varExpr).map fun n => (n, annotationToPythonType none) + | none => [] + ctxWalrus ++ varNames + itemLocals ++ bodyStmts.val.toList.flatMap collectLocalsFromStmt + | .AsyncFor _ target iter bodyStmts orelse _ => + let targetNames := (collectNamesFromTarget target).map fun n => (n, annotationToPythonType none) + let iterWalrus := (collectWalrusNames iter).map fun n => (n, annotationToPythonType none) + targetNames ++ iterWalrus ++ + bodyStmts.val.toList.flatMap collectLocalsFromStmt ++ + orelse.val.toList.flatMap collectLocalsFromStmt + | .Match _ subject cases => + let subjectW := (collectWalrusNames subject).map fun n => (n, annotationToPythonType none) + let caseLocals := cases.val.toList.flatMap fun c => + match c with + | .mk_match_case _ _pattern guardOpt caseBody => + -- TODO: extract pattern bindings from _pattern (requires walking Python.pattern) + let guardW := match guardOpt.val with + | some g => (collectWalrusNames g).map fun n => (n, annotationToPythonType none) + | none => [] + guardW ++ caseBody.val.toList.flatMap collectLocalsFromStmt + subjectW ++ caseLocals + | .FunctionDef _ _ _ _ _ _ _ _ => [] + | .AsyncFunctionDef _ _ _ _ _ _ _ _ => [] + | .ClassDef _ _ _ _ _ _ _ => [] + | .Return _ valOpt => + match valOpt.val with + | some v => (collectWalrusNames v).map (fun n => (n, annotationToPythonType none)) + | none => [] + | .Delete _ targets => + targets.val.toList.flatMap fun t => (collectWalrusNames t).map fun n => (n, annotationToPythonType none) + | .Raise _ excOpt causeOpt => + let excW := match excOpt.val with | some e => collectWalrusNames e | none => [] + let causeW := match causeOpt.val with | some e => collectWalrusNames e | none => [] + (excW ++ causeW).map fun n => (n, annotationToPythonType none) + | .Assert _ test msgOpt => + let testW := collectWalrusNames test + let msgW := match msgOpt.val with | some e => collectWalrusNames e | none => [] + (testW ++ msgW).map fun n => (n, annotationToPythonType none) + | .Pass _ => [] + | .Break _ => [] + | .Continue _ => [] + | .Import _ aliases => + aliases.val.toList.filterMap fun alias => + match alias with + | .mk_alias _ modName asName => + let id := match asName.val with + | some aliasName => PythonIdentifier.fromAst aliasName + | none => PythonIdentifier.fromImport modName + some (id, annotationToPythonType none) + | .ImportFrom _ _ imports _ => + imports.val.toList.filterMap fun imp => + match imp with + | .mk_alias _ impName asName => + let id := match asName.val with + | some aliasName => PythonIdentifier.fromAst aliasName + | none => PythonIdentifier.fromAst impName + some (id, annotationToPythonType none) + | .Global _ _ => [] + | .Nonlocal _ _ => [] + | .Expr _ value => + (collectWalrusNames value).map (fun n => (n, annotationToPythonType none)) + | .TypeAlias _ nameExpr _ _ => + (collectNamesFromTarget nameExpr).map fun n => (n, annotationToPythonType none) + +/-- Collects names declared `global` or `nonlocal` in a function body (including nested blocks). + These are excluded from locals — they refer to enclosing/global scope. -/ +partial def collectGlobalNonlocalNames (s : PythonStmt) : List PythonIdentifier := + match s with + | .Global _ names => names.val.toList.map PythonIdentifier.fromAst + | .Nonlocal _ names => names.val.toList.map PythonIdentifier.fromAst + | .If _ _ body orelse => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames + | .For _ _ _ body orelse _ => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames + | .AsyncFor _ _ _ body orelse _ => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames + | .While _ _ body orelse => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames + | .Try _ body handlers orelse finalbody => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + handlers.val.toList.flatMap (fun h => match h with + | .ExceptHandler _ _ _ hBody => hBody.val.toList.flatMap collectGlobalNonlocalNames) ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames ++ + finalbody.val.toList.flatMap collectGlobalNonlocalNames + | .TryStar _ body handlers orelse finalbody => + body.val.toList.flatMap collectGlobalNonlocalNames ++ + handlers.val.toList.flatMap (fun h => match h with + | .ExceptHandler _ _ _ hBody => hBody.val.toList.flatMap collectGlobalNonlocalNames) ++ + orelse.val.toList.flatMap collectGlobalNonlocalNames ++ + finalbody.val.toList.flatMap collectGlobalNonlocalNames + | .With _ _ body _ => body.val.toList.flatMap collectGlobalNonlocalNames + | .AsyncWith _ _ body _ => body.val.toList.flatMap collectGlobalNonlocalNames + | .Match _ _ cases => + cases.val.toList.flatMap fun c => match c with + | .mk_match_case _ _ _ caseBody => caseBody.val.toList.flatMap collectGlobalNonlocalNames + | _ => [] + +/-- Python scoping: any assignment target in a function body is local to that function. + Collects all such names (excluding params, globals, nonlocals, and nested def/class names), + deduplicates preserving first-occurrence order. Used by `extractFuncSig` to populate `FuncSig.locals`. -/ +def computeLocals (body : PythonProgram) (paramNames : List PythonIdentifier) + : List (PythonIdentifier × PythonType) := + let allPairs := body.toList.flatMap collectLocalsFromStmt + let globalNonlocal := body.toList.flatMap collectGlobalNonlocalNames + let excluded : Std.HashSet PythonIdentifier := (paramNames ++ globalNonlocal).foldl (fun s n => s.insert n) {} + let (_, result) := allPairs.foldl (init := (excluded, ([] : List (PythonIdentifier × PythonType)))) fun acc pair => + let (seen, result) := acc + let (name, ty) := pair + if seen.contains name then (seen, result) + else (seen.insert name, result ++ [(name, ty)]) + result + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Extract FuncSig from a Python FunctionDef +-- ═══════════════════════════════════════════════════════════════════════════════ + +private def argToParam (arg : Python.arg SourceRange) : PythonIdentifier × PythonType := + match arg with + | .mk_arg _ argName annotation _ => (PythonIdentifier.fromAst argName, annotationToPythonType annotation.val) + +private def extractAllParamNames (args : Python.arguments SourceRange) : List PythonIdentifier := + match args with + | .mk_arguments _ posonlyargs argList vararg kwonlyargs _ kwarg _ => + let names := (posonlyargs.val.toList ++ argList.val.toList ++ kwonlyargs.val.toList).map fun arg => + match arg with | .mk_arg _ argName _ _ => PythonIdentifier.fromAst argName + let vaName := match vararg.val with | some (.mk_arg _ n _ _) => [PythonIdentifier.fromAst n] | none => [] + let kwName := match kwarg.val with | some (.mk_arg _ n _ _) => [PythonIdentifier.fromAst n] | none => [] + names ++ vaName ++ kwName + +private def hasStaticmethodDecorator (decorators : Array PythonExpr) : Bool := + decorators.any fun d => match d with + | .Name _ n _ => n.val == "staticmethod" + | _ => false + +private def hasOverloadDecorator (decorators : Array PythonExpr) : Bool := + decorators.any fun d => match d with + | .Name _ n _ => n.val == "overload" + | _ => false + +/-- Check if a call argument matches a parameter's type for overload resolution. + A Literal["value"] parameter matches a string constant with the same value. + All other parameter types match any argument (broad matching). -/ +private def argMatchesParam (arg : PythonExpr) (paramTy : PythonType) : Bool := + match paramTy with + | .Subscript _ (.Name _ tName _) (.Constant _ (.ConString _ litVal) _) _ => + if tName.val == "Literal" then + match arg with + | .Constant _ (.ConString _ argVal) _ => argVal == litVal + | _ => false + else true + | _ => true + +/-- Check if call arguments match an overload's parameter signature. -/ +private def matchOverload (sig : FuncSig) (args : Array PythonExpr) : Bool := + match sig.params with + | .static pl => + let params := pl.required + params.zip args.toList |>.all fun ((_, paramTy), arg) => argMatchesParam arg paramTy + | .instance _ pl => + let params := pl.required + params.zip args.toList |>.all fun ((_, paramTy), arg) => argMatchesParam arg paramTy + +/-! ## Python Name → Laurel Name Mapping + +The builtin mapping (`len` → `Any_len_to_Any`), method qualification +(`get_x` → `Account@get_x`), and module qualification +(`timedelta` → `datetime_timedelta`) are encoded in accessor functions. +Translation calls these accessors — it never fabricates Laurel identifiers +from strings or applies naming conventions itself. + +`PythonIdentifier.toLaurel` is identity — bare name to Laurel.Identifier. +`FuncSig.laurelName` applies the builtin mapping for top-level functions and +`ClassName@method` qualification for class methods. -/ + +def pythonNameToLaurel : String → String + | "len" => "Any_len_to_Any" + | "str" => "to_string_any" + | "int" => "to_int_any" + | "float" => "to_float_any" + | "bool" => "Any_to_bool" + | "abs" => "Any_abs_to_Any" + | "print" => "print" + | "repr" => "to_string_any" + | "type" => "Any_type_to_Any" + | "isinstance" => "Any_isinstance_to_bool" + | "hasattr" => "Any_hasattr_to_bool" + | "getattr" => "Any_getattr_to_Any" + | "setattr" => "Any_setattr_to_Any" + | "sorted" => "Any_sorted_to_Any" + | "reversed" => "Any_reversed_to_Any" + | "enumerate" => "Any_enumerate_to_Any" + | "zip" => "Any_zip_to_Any" + | "range" => "Any_range_to_Any" + | "list" => "Any_list_to_Any" + | "dict" => "Any_dict_to_Any" + | "set" => "Any_set_to_Any" + | "tuple" => "Any_tuple_to_Any" + | "min" => "Any_min_to_Any" + | "max" => "Any_max_to_Any" + | "sum" => "Any_sum_to_Any" + | "any" => "Any_any_to_bool" + | "all" => "Any_all_to_bool" + | "ord" => "Any_ord_to_Any" + | "chr" => "Any_chr_to_Any" + | "map" => "Any_map_to_Any" + | "filter" => "Any_filter_to_Any" + | "timedelta" => "timedelta_func" + | other => other + +def operatorToLaurel : Python.operator SourceRange → String + | .Add _ => "PAdd" | .Sub _ => "PSub" | .Mult _ => "PMul" | .Div _ => "PDiv" + | .FloorDiv _ => "PFloorDiv" | .Mod _ => "PMod" | .Pow _ => "PPow" + | .BitAnd _ => "PBitAnd" | .BitOr _ => "PBitOr" | .BitXor _ => "PBitXor" + | .LShift _ => "PLShift" | .RShift _ => "PRShift" | .MatMult _ => "PMatMul" + +def cmpopToLaurel : Python.cmpop SourceRange → String + | .Eq _ => "PEq" | .NotEq _ => "PNEq" | .Lt _ => "PLt" | .LtE _ => "PLe" + | .Gt _ => "PGt" | .GtE _ => "PGe" | .In _ => "PIn" | .NotIn _ => "PNotIn" + | .Is _ => "PIs" | .IsNot _ => "PIsNot" + +def unaryopToLaurel : Python.unaryop SourceRange → String + | .Not _ => "PNot" | .USub _ => "PNeg" | .UAdd _ => "PPos" | .Invert _ => "PInvert" + +def boolopToLaurel : Python.boolop SourceRange → String + | .And _ => "PAnd" | .Or _ => "POr" + +/-! ## Accessor Functions (Python → Laurel) + +Translation calls these to obtain `Laurel.Identifier` values on demand. +They encode the naming conventions in one place. Translation never +fabricates identifiers from raw strings — it calls these accessors. -/ + +/-- Identity: bare Python name → Laurel.Identifier. No mapping applied. + Used for variable names, param names, field names, local names. -/ +def PythonIdentifier.toLaurel (id : PythonIdentifier) : Laurel.Identifier := + { text := id.val, uniqueId := none } + +/-- Produces the Laurel procedure name. Applies builtin mapping for top-level + functions (`len` → `Any_len_to_Any`) and class qualification for methods + (`get_x` with `className = some "Account"` → `Account@get_x`). -/ +def FuncSig.laurelName (sig : FuncSig) : Laurel.Identifier := + let baseName := match sig.className with + | some cls => s!"{cls.val}@{sig.name.val}" + | none => pythonNameToLaurel sig.name.val + let name := match sig.overloadIndex with + | some idx => s!"{baseName}${idx}" + | none => baseName + { text := name, uniqueId := none } + +private def ParamList.allParams (pl : ParamList) : List (PythonIdentifier × PythonType) := + pl.required ++ pl.optional.map (fun (n, ty, _) => (n, ty)) ++ pl.kwonly.map (fun (n, ty, _) => (n, ty)) + +/-- All procedure inputs as `(Laurel.Identifier × PythonType)`. For instance + methods, includes the receiver as first element (typed Any). For static + functions, just the params. Translation uses this to declare procedure inputs. + Inputs are named `$in_X` at the Laurel level (body uses mutable local `X`). -/ +def FuncSig.laurelDeclInputs (sig : FuncSig) : List (Laurel.Identifier × PythonType) := + let anyTy : PythonType := .Name SourceRange.none ⟨SourceRange.none, "Any"⟩ (.Load SourceRange.none) + let base := match sig.params with + | .instance recv pl => + ({ text := recv.val, uniqueId := none }, anyTy) :: pl.allParams.map fun (id, ty) => ({ text := id.val, uniqueId := none }, ty) + | .static pl => + pl.allParams.map fun (id, ty) => ({ text := id.val, uniqueId := none }, ty) + match sig.kwargName with + | some kw => base ++ [({ text := kw.val, uniqueId := none }, anyTy)] + | none => base + +/-- Zip-fold arg matching. Each param slot is filled in order: + 1. If a positional arg remains → consume it + 2. Else if a kwarg matches by name → use it + 3. Else if a default exists → translate it via `translateDefault` + 4. Else → panic (Resolution bug: required param without arg) + + Includes receiver slot for instance methods. Lives in Resolution + because it accesses private `ParamList` fields and resolved defaults. -/ +def FuncSig.matchArgs [Monad m] [Inhabited (m α)] (sig : FuncSig) (posArgs : List α) (kwargs : List (String × α)) + (translateDefault : ResolvedPythonExpr → m α) (mkKwargs : m (Option α) := pure none) : m (List α) := do + let (receiverSlot, pl) := match sig.params with + | .instance recv pl => ([(recv.val, (none : Option ResolvedPythonExpr))], pl) + | .static pl => ([], pl) + let slots : List (String × Option ResolvedPythonExpr) := + receiverSlot ++ + pl.required.map (fun (id, _) => (id.val, none)) ++ + pl.optional.map (fun (id, _, dflt) => (id.val, some dflt)) ++ + pl.kwonly.map (fun (id, _, dflt) => (id.val, dflt)) + let (result, _) ← slots.foldlM (fun (acc, pos) (pName, dflt) => do + match pos with + | a :: rest => pure (acc ++ [a], rest) + | [] => + let v ← match kwargs.find? (fun (k, _) => k == pName) with + | some (_, v) => pure v + | none => match dflt with + | some d => translateDefault d + | none => panic! "Resolution bug: required param without arg" + pure (acc ++ [v], []) + ) ([], posArgs) + -- Append a value for the `**kwargs` declared input, if present. + if sig.kwargName.isSome then + let kwOpt ← mkKwargs + match kwOpt with + | some kw => return (result ++ [kw]) + | none => return result + else + return result + +/-- Locals as `(Laurel.Identifier × PythonType)` for `LocalVariable` declarations. -/ +def FuncSig.laurelLocals (sig : FuncSig) : List (Laurel.Identifier × PythonType) := + sig.locals.map fun (id, ty) => ({ text := id.val, uniqueId := none }, ty) + +/-- The receiver's Laurel.Identifier, if this is an instance method. -/ +def FuncSig.laurelReceiver (sig : FuncSig) : Option Laurel.Identifier := + match sig.params with + | .instance recv _ => some { text := recv.val, uniqueId := none } + | .static _ => none + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Initial Context: Python Builtins +-- ═══════════════════════════════════════════════════════════════════════════════ + +private def anyType : PythonType := .Name SourceRange.none ⟨SourceRange.none, "Any"⟩ (.Load SourceRange.none) +private def intType : PythonType := .Name SourceRange.none ⟨SourceRange.none, "int"⟩ (.Load SourceRange.none) +private def strType : PythonType := .Name SourceRange.none ⟨SourceRange.none, "str"⟩ (.Load SourceRange.none) +private def boolType : PythonType := .Name SourceRange.none ⟨SourceRange.none, "bool"⟩ (.Load SourceRange.none) + +private def mkBuiltinSig (pythonName : String) (params : List (String × PythonType)) (retTy : PythonType) : FuncSig := + let required := params.map fun (n, ty) => (PythonIdentifier.builtin n, ty) + { name := .builtin pythonName, className := none, + params := .static { required, optional := [], kwonly := [] }, + returnType := retTy, locals := [] } + +/-- The initial context: all Python builtins with their FuncSig (correct arity, param names, + return types). Resolution starts from this and extends with user-defined declarations. -/ +def builtinContext : Ctx := + let entries : List (PythonIdentifier × CtxEntry) := [ + (.builtin "len", .function (mkBuiltinSig "len" [("obj", anyType)] intType)), + (.builtin "str", .function (mkBuiltinSig "str" [("obj", anyType)] strType)), + (.builtin "int", .function (mkBuiltinSig "int" [("obj", anyType)] intType)), + (.builtin "float", .function (mkBuiltinSig "float" [("obj", anyType)] anyType)), + (.builtin "bool", .function (mkBuiltinSig "bool" [("obj", anyType)] boolType)), + (.builtin "print", .function (mkBuiltinSig "print" [("obj", anyType)] anyType)), + (.builtin "repr", .function (mkBuiltinSig "repr" [("obj", anyType)] strType)), + (.builtin "type", .function (mkBuiltinSig "type" [("obj", anyType)] anyType)), + (.builtin "isinstance", .function (mkBuiltinSig "isinstance" [("obj", anyType), ("cls", anyType)] boolType)), + (.builtin "hasattr", .function (mkBuiltinSig "hasattr" [("obj", anyType), ("name", strType)] boolType)), + (.builtin "getattr", .function (mkBuiltinSig "getattr" [("obj", anyType), ("name", strType)] anyType)), + (.builtin "setattr", .function (mkBuiltinSig "setattr" [("obj", anyType), ("name", strType), ("value", anyType)] anyType)), + (.builtin "sorted", .function (mkBuiltinSig "sorted" [("iterable", anyType)] anyType)), + (.builtin "reversed", .function (mkBuiltinSig "reversed" [("seq", anyType)] anyType)), + (.builtin "enumerate", .function (mkBuiltinSig "enumerate" [("iterable", anyType)] anyType)), + (.builtin "zip", .function (mkBuiltinSig "zip" [("a", anyType), ("b", anyType)] anyType)), + (.builtin "range", .function (mkBuiltinSig "range" [("stop", anyType)] anyType)), + (.builtin "list", .function (mkBuiltinSig "list" [("iterable", anyType)] anyType)), + (.builtin "dict", .function (mkBuiltinSig "dict" [("iterable", anyType)] anyType)), + (.builtin "set", .function (mkBuiltinSig "set" [("iterable", anyType)] anyType)), + (.builtin "tuple", .function (mkBuiltinSig "tuple" [("iterable", anyType)] anyType)), + (.builtin "min", .function (mkBuiltinSig "min" [("a", anyType), ("b", anyType)] anyType)), + (.builtin "max", .function (mkBuiltinSig "max" [("a", anyType), ("b", anyType)] anyType)), + (.builtin "sum", .function (mkBuiltinSig "sum" [("iterable", anyType)] anyType)), + (.builtin "any", .function (mkBuiltinSig "any" [("iterable", anyType)] boolType)), + (.builtin "all", .function (mkBuiltinSig "all" [("iterable", anyType)] boolType)), + (.builtin "abs", .function (mkBuiltinSig "abs" [("x", anyType)] anyType)), + (.builtin "ord", .function (mkBuiltinSig "ord" [("c", strType)] intType)), + (.builtin "chr", .function (mkBuiltinSig "chr" [("i", intType)] strType)), + (.builtin "map", .function (mkBuiltinSig "map" [("func", anyType), ("iterable", anyType)] anyType)), + (.builtin "filter", .function (mkBuiltinSig "filter" [("func", anyType), ("iterable", anyType)] anyType)) + ] + entries.foldl (fun ctx (name, info) => ctx.insert name info) {} + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Spine type resolution (chases .Name and .Attribute chains) +-- ═══════════════════════════════════════════════════════════════════════════════ + +-- typeOfExpr and resolveMethodCall moved into the mutual block below + +-- resolveMethodCall moved into the mutual block below + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- AST Annotation Mapping (f : SourceRange → ResolvedAnn through the tree) +-- ═══════════════════════════════════════════════════════════════════════════════ + +private def mapAnnVal (f : α → β) (a : Ann T α) : Ann T β := ⟨f a.ann, a.val⟩ +private def mapAnnOpt (f : α → β) (mapT : T₁ → T₂) (a : Ann (Option T₁) α) : Ann (Option T₂) β := + ⟨f a.ann, a.val.map mapT⟩ +private def mapAnnArr (f : α → β) (mapT : T₁ → T₂) (a : Ann (Array T₁) α) : Ann (Array T₂) β := + ⟨f a.ann, a.val.map mapT⟩ + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- The Fold: resolve +-- +-- Threads Ctx as accumulator. Declarations extend it. References look up from it. +-- Non-reference nodes get .none. Reference nodes get their lookup result. +-- ═══════════════════════════════════════════════════════════════════════════════ + +mutual + +/-- Extracts a `ParamList` from Python's `arguments` AST node. Resolves default expressions + via `resolveExpr` so they carry `ResolvedAnn` annotations for later Translation use. -/ +partial def extractParamList (ctx : Ctx) (f : SourceRange → ResolvedAnn) (args : Python.arguments SourceRange) : ResolveM ParamList := do + match args with + | .mk_arguments _ posonlyargs argList _ kwonlyargs kwDefaults kwarg defaults => + let posAndRegular := posonlyargs.val.toList ++ argList.val.toList + let allPosParams := posAndRegular.map argToParam + let defaultCount := defaults.val.size + let requiredCount := allPosParams.length - defaultCount + let required := allPosParams.take requiredCount + let optionalParams := allPosParams.drop requiredCount + let mut optional : List (PythonIdentifier × PythonType × ResolvedPythonExpr) := [] + for ((n, ty), dflt) in optionalParams.zip (defaults.val.toList) do + optional := optional ++ [(n, ty, ← resolveExpr ctx f dflt)] + let kwParams := kwonlyargs.val.toList.map argToParam + let mut kwonly : List (PythonIdentifier × PythonType × Option ResolvedPythonExpr) := [] + for ((n, ty), optExpr) in kwParams.zip (kwDefaults.val.toList) do + match optExpr with + | .some_expr _ e => kwonly := kwonly ++ [(n, ty, some (← resolveExpr ctx f e))] + | .missing_expr _ => kwonly := kwonly ++ [(n, ty, none)] + let _ := kwarg -- `**kwargs` registered separately by resolveFunctionBody + return { required, optional, kwonly } + +/-- Builds a complete `FuncSig` for a function/method definition. Determines instance vs static + (if `className` is set and no `@staticmethod`, first param becomes receiver), computes locals, + and stores the resolved param list. This is the single point where FuncSig is created. -/ +partial def extractFuncSig (ctx : Ctx) (f : SourceRange → ResolvedAnn) + (pythonName : PythonIdentifier) (className : Option PythonIdentifier) + (args : Python.arguments SourceRange) (decorators : Array PythonExpr) + (returns : Ann (Option PythonExpr) SourceRange) + (body : PythonProgram) : ResolveM FuncSig := do + let paramList ← extractParamList ctx f args + let retTy := annotationToPythonType returns.val + let allParamNames := extractAllParamNames args + let locals := computeLocals body allParamNames + let funcParams := + if className.isNone || hasStaticmethodDecorator decorators then + .static paramList + else match paramList.required with + | (recv, _) :: rest => .instance recv { paramList with required := rest } + | [] => .static paramList + let kwargName := match args with + | .mk_arguments _ _ _ _ _ _ kwarg _ => match kwarg.val with + | some (.mk_arg _ n _ _) => some (PythonIdentifier.fromAst n) + | none => none + return { name := pythonName, className, params := funcParams, returnType := retTy, locals, kwargName } + +/-- Builds the body context for resolving statements inside a function. Extends ctx with + all params (including vararg/kwarg) and locals. Used by `resolveFuncDef` to create the + scope in which the function body is resolved. -/ +partial def resolveFunctionBody (ctx : Ctx) (f : SourceRange → ResolvedAnn) (args : Python.arguments SourceRange) (body : PythonProgram) : ResolveM Ctx := do + let pl ← extractParamList ctx f args + let allParams := pl.required ++ pl.optional.map (fun (n, ty, _) => (n, ty)) ++ pl.kwonly.map (fun (n, ty, _) => (n, ty)) + let varargKwarg : List (PythonIdentifier × PythonType) := match args with + | .mk_arguments _ _ _ vararg _ _ kwarg _ => + let va := match vararg.val with | some a => [argToParam a] | none => [] + let kw := match kwarg.val with | some a => [argToParam a] | none => [] + va ++ kw + let allParamNames := extractAllParamNames args + let locals := computeLocals body allParamNames + let bodyCtx := allParams.foldl (fun c (n, ty) => c.insert n (CtxEntry.variable ty)) ctx + let bodyCtx := varargKwarg.foldl (fun c (n, ty) => c.insert n (CtxEntry.variable ty)) bodyCtx + return locals.foldl (fun c (n, ty) => c.insert n (CtxEntry.variable ty)) bodyCtx + +partial def resolveExprCtx (f : SourceRange → ResolvedAnn) : Python.expr_context SourceRange → Python.expr_context ResolvedAnn + | .Load a => .Load (f a) | .Store a => .Store (f a) | .Del a => .Del (f a) + +partial def resolveConstant (f : SourceRange → ResolvedAnn) : Python.constant SourceRange → Python.constant ResolvedAnn + | .ConTrue a => .ConTrue (f a) | .ConFalse a => .ConFalse (f a) + | .ConPos a n => .ConPos (f a) (mapAnnVal f n) | .ConNeg a n => .ConNeg (f a) (mapAnnVal f n) + | .ConString a s => .ConString (f a) (mapAnnVal f s) | .ConFloat a s => .ConFloat (f a) (mapAnnVal f s) + | .ConComplex a r i => .ConComplex (f a) (mapAnnVal f r) (mapAnnVal f i) + | .ConNone a => .ConNone (f a) | .ConEllipsis a => .ConEllipsis (f a) + | .ConBytes a b => .ConBytes (f a) (mapAnnVal f b) + +partial def resolveInt (f : SourceRange → ResolvedAnn) : Python.int SourceRange → Python.int ResolvedAnn + | .IntPos a n => .IntPos (f a) (mapAnnVal f n) | .IntNeg a n => .IntNeg (f a) (mapAnnVal f n) + +partial def resolveOperator (f : SourceRange → ResolvedAnn) : Python.operator SourceRange → Python.operator ResolvedAnn + | .Add a => .Add (f a) | .Sub a => .Sub (f a) | .Mult a => .Mult (f a) | .Div a => .Div (f a) + | .FloorDiv a => .FloorDiv (f a) | .Mod a => .Mod (f a) | .Pow a => .Pow (f a) + | .BitAnd a => .BitAnd (f a) | .BitOr a => .BitOr (f a) | .BitXor a => .BitXor (f a) + | .LShift a => .LShift (f a) | .RShift a => .RShift (f a) | .MatMult a => .MatMult (f a) + +partial def resolveBoolop (f : SourceRange → ResolvedAnn) : Python.boolop SourceRange → Python.boolop ResolvedAnn + | .And a => .And (f a) | .Or a => .Or (f a) + +partial def resolveUnaryop (f : SourceRange → ResolvedAnn) : Python.unaryop SourceRange → Python.unaryop ResolvedAnn + | .Not a => .Not (f a) | .USub a => .USub (f a) | .UAdd a => .UAdd (f a) | .Invert a => .Invert (f a) + +partial def resolveCmpop (f : SourceRange → ResolvedAnn) : Python.cmpop SourceRange → Python.cmpop ResolvedAnn + | .Eq a => .Eq (f a) | .NotEq a => .NotEq (f a) | .Lt a => .Lt (f a) | .LtE a => .LtE (f a) + | .Gt a => .Gt (f a) | .GtE a => .GtE (f a) | .Is a => .Is (f a) | .IsNot a => .IsNot (f a) + | .In a => .In (f a) | .NotIn a => .NotIn (f a) + +partial def resolveOptExpr (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.opt_expr SourceRange → ResolveM (Python.opt_expr ResolvedAnn) + | .some_expr a e => do return .some_expr (f a) (← resolveExpr ctx f e) + | .missing_expr a => return .missing_expr (f a) + +partial def resolveKeyword (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.keyword SourceRange → ResolveM (Python.keyword ResolvedAnn) + | .mk_keyword a arg val => do return .mk_keyword (f a) (mapAnnOpt f (mapAnnVal f) arg) (← resolveExpr ctx f val) + +partial def resolveArg (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.arg SourceRange → ResolveM (Python.arg ResolvedAnn) + | .mk_arg a name ann tc => do + let rAnn ← match ann.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .mk_arg (f a) (mapAnnVal f name) ⟨f ann.ann, rAnn⟩ (mapAnnOpt f (mapAnnVal f) tc) + +partial def resolveArguments (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.arguments SourceRange → ResolveM (Python.arguments ResolvedAnn) + | .mk_arguments a posonlyargs args vararg kwonlyargs kwDefaults kwarg defaults => do + let mut rPosonlyargs : Array (Python.arg ResolvedAnn) := #[] + for arg in posonlyargs.val do rPosonlyargs := rPosonlyargs.push (← resolveArg ctx f arg) + let mut rArgs : Array (Python.arg ResolvedAnn) := #[] + for arg in args.val do rArgs := rArgs.push (← resolveArg ctx f arg) + let rVararg ← match vararg.val with + | some a => pure (some (← resolveArg ctx f a)) + | none => pure none + let mut rKwonlyargs : Array (Python.arg ResolvedAnn) := #[] + for arg in kwonlyargs.val do rKwonlyargs := rKwonlyargs.push (← resolveArg ctx f arg) + let mut rKwDefaults : Array (Python.opt_expr ResolvedAnn) := #[] + for oe in kwDefaults.val do rKwDefaults := rKwDefaults.push (← resolveOptExpr ctx f oe) + let rKwarg ← match kwarg.val with + | some a => pure (some (← resolveArg ctx f a)) + | none => pure none + let mut rDefaults : Array ResolvedPythonExpr := #[] + for d in defaults.val do rDefaults := rDefaults.push (← resolveExpr ctx f d) + return .mk_arguments (f a) + ⟨f posonlyargs.ann, rPosonlyargs⟩ + ⟨f args.ann, rArgs⟩ + ⟨f vararg.ann, rVararg⟩ + ⟨f kwonlyargs.ann, rKwonlyargs⟩ + ⟨f kwDefaults.ann, rKwDefaults⟩ + ⟨f kwarg.ann, rKwarg⟩ + ⟨f defaults.ann, rDefaults⟩ + +partial def resolveComprehension (ctx : Ctx) (f : SourceRange → ResolvedAnn) (comp : Python.comprehension SourceRange) : ResolveM (Ctx × Python.comprehension ResolvedAnn) := do + match comp with + | .mk_comprehension a target iter ifs isAsync => + let targetNames := collectNamesFromTarget target + let compCtx := targetNames.foldl (fun c n => c.insert n (CtxEntry.variable (annotationToPythonType Option.none))) ctx + let rTarget ← resolveExpr compCtx f target + let rIter ← resolveExpr ctx f iter + let mut rIfs : Array ResolvedPythonExpr := #[] + for i in ifs.val do rIfs := rIfs.push (← resolveExpr compCtx f i) + return (compCtx, .mk_comprehension (f a) rTarget rIter ⟨f ifs.ann, rIfs⟩ (resolveInt f isAsync)) + +partial def resolveComprehensions (ctx : Ctx) (f : SourceRange → ResolvedAnn) (comps : List (Python.comprehension SourceRange)) : ResolveM (Ctx × List (Python.comprehension ResolvedAnn)) := do + let mut c := ctx + let mut resolved : List (Python.comprehension ResolvedAnn) := [] + for comp in comps do + let (c', r) ← resolveComprehension c f comp + c := c' + resolved := resolved ++ [r] + return (c, resolved) + +partial def resolveTypeParam (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.type_param SourceRange → ResolveM (Python.type_param ResolvedAnn) + | .TypeVar a name bound def_ => do + let rBound ← match bound.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + let rDef ← match def_.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .TypeVar (f a) (mapAnnVal f name) ⟨f bound.ann, rBound⟩ ⟨f def_.ann, rDef⟩ + | .TypeVarTuple a name def_ => do + let rDef ← match def_.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .TypeVarTuple (f a) (mapAnnVal f name) ⟨f def_.ann, rDef⟩ + | .ParamSpec a name def_ => do + let rDef ← match def_.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .ParamSpec (f a) (mapAnnVal f name) ⟨f def_.ann, rDef⟩ + +/-- The core expression resolver. Annotates each expression node with appropriate `NodeInfo`: + - `.Name` → look up in ctx, annotate with `.variable` + - `.Call` → determine callee (function/class/method), annotate with `.funcCall` or `.classNew` + - `.Attribute` → annotate with `.attribute` (bare field name; Elaboration resolves via receiver type) + - `.BinOp`/`.UnaryOp`/`.Compare`/`.BoolOp` → create operator FuncSig, annotate with `.funcCall` + - Comprehensions → extend ctx with iteration variables before resolving element expression -/ +partial def resolveExpr (ctx : Ctx) (f : SourceRange → ResolvedAnn) (e : PythonExpr) : ResolveM ResolvedPythonExpr := do + match e with + | .Name a n ectx => + let nId := PythonIdentifier.fromAst n + let info := match ctx[nId]? with + | some (.variable _) => .variable nId + | some (.function _) => .unresolved + | some (.overloadedFunction _) => .unresolved + | some (.class_ _ _ _ _) => .unresolved + | some (.module_ _) => .irrelevant + | some .unresolved => .unresolved + | none => .unresolved + return .Name { sr := a, info } (mapAnnVal f n) (resolveExprCtx f ectx) + | .Call a func args kwargs => + let callInfo : NodeInfo ← match func with + | .Name _ n _ => + let nId := PythonIdentifier.fromAst n + match ctx[nId]? with + | some (.function sig) => pure (.funcCall sig) + | some (.overloadedFunction overloads) => + let matched := overloads.find? fun (_, olSig, _) => + matchOverload olSig args.val + match matched with + | some (idx, sig, astOpt) => do + let sig' := { sig with overloadIndex := some idx } + match astOpt with + | some fAst => resolveFunctionAstSig ctx f sig' fAst + | none => pure () + pure (.funcCall sig') + | none => pure .unresolved + | some (.class_ cId _ methods _) => + let initId := PythonIdentifier.builtin "__init__" + match methods.find? (fun (mName, _) => mName == initId) with + | some (_, sig) => pure (.classNew cId sig) + | none => + let emptySig : FuncSig := { name := initId, className := some cId, params := .static {required := [], optional := [], kwonly := []}, returnType := anyType, locals := [] } + pure (.classNew cId emptySig) + | _ => pure .unresolved + | .Attribute _ receiver methodName _ => + resolveMethodCall ctx receiver methodName args.val + | _ => pure .unresolved + let rFunc ← resolveExpr ctx f func + let mut rArgs : Array ResolvedPythonExpr := #[] + for arg in args.val do + rArgs := rArgs.push (← resolveExpr ctx f arg) + let mut rKwargs : Array (Python.keyword ResolvedAnn) := #[] + for kw in kwargs.val do + rKwargs := rKwargs.push (← resolveKeyword ctx f kw) + return .Call { sr := a, info := callInfo } rFunc ⟨f args.ann, rArgs⟩ ⟨f kwargs.ann, rKwargs⟩ + | .Attribute a obj attr ectx => + let rObj ← resolveExpr ctx f obj + -- A field access requires a value receiver. If the object is a module + -- (.irrelevant) or unresolved, the attribute is not a field of a value + -- (e.g. `sys.argv` is a module member); it resolves to .unresolved (→ hole). + let info := match rObj.ann.info with + | .irrelevant | .unresolved => .unresolved + | _ => .attribute (PythonIdentifier.fromAst attr) + return .Attribute { sr := a, info } rObj (mapAnnVal f attr) (resolveExprCtx f ectx) + | .Constant a c tc => return .Constant (f a) (resolveConstant f c) (mapAnnOpt f (mapAnnVal f) tc) + | .BinOp a left op right => + let opSig : FuncSig := { name := .builtin (operatorToLaurel op), className := none, params := .static {required := [(.builtin "left", anyType), (.builtin "right", anyType)], optional := [], kwonly := []}, returnType := anyType, locals := [] } + let rLeft ← resolveExpr ctx f left + let rRight ← resolveExpr ctx f right + return .BinOp { sr := a, info := .funcCall opSig } rLeft (resolveOperator f op) rRight + | .BoolOp a op operands => + let opSig : FuncSig := { name := .builtin (boolopToLaurel op), className := none, params := .static {required := [(.builtin "left", anyType), (.builtin "right", anyType)], optional := [], kwonly := []}, returnType := anyType, locals := [] } + let mut rOperands : Array ResolvedPythonExpr := #[] + for operand in operands.val do + rOperands := rOperands.push (← resolveExpr ctx f operand) + return .BoolOp { sr := a, info := .funcCall opSig } (resolveBoolop f op) ⟨f operands.ann, rOperands⟩ + | .UnaryOp a op operand => + let opSig : FuncSig := { name := .builtin (unaryopToLaurel op), className := none, params := .static {required := [(.builtin "operand", anyType)], optional := [], kwonly := []}, returnType := anyType, locals := [] } + let rOperand ← resolveExpr ctx f operand + return .UnaryOp { sr := a, info := .funcCall opSig } (resolveUnaryop f op) rOperand + | .Compare a left ops comps => + let opName := match ops.val[0]? with | some op => cmpopToLaurel op | none => "PEq" + let opSig : FuncSig := { name := .builtin opName, className := none, params := .static {required := [(.builtin "left", anyType), (.builtin "right", anyType)], optional := [], kwonly := []}, returnType := anyType, locals := [] } + let rLeft ← resolveExpr ctx f left + let mut rComps : Array ResolvedPythonExpr := #[] + for comp in comps.val do + rComps := rComps.push (← resolveExpr ctx f comp) + return .Compare { sr := a, info := .funcCall opSig } rLeft (mapAnnArr f (resolveCmpop f) ops) ⟨f comps.ann, rComps⟩ + | .IfExp a test body orelse => + let rTest ← resolveExpr ctx f test + let rBody ← resolveExpr ctx f body + let rElse ← resolveExpr ctx f orelse + return .IfExp (f a) rTest rBody rElse + | .Dict a keys vals => + let mut rKeys : Array (Python.opt_expr ResolvedAnn) := #[] + for k in keys.val do + rKeys := rKeys.push (← resolveOptExpr ctx f k) + let mut rVals : Array ResolvedPythonExpr := #[] + for v in vals.val do + rVals := rVals.push (← resolveExpr ctx f v) + return .Dict (f a) ⟨f keys.ann, rKeys⟩ ⟨f vals.ann, rVals⟩ + | .Set a elts => + let mut rElts : Array ResolvedPythonExpr := #[] + for elt in elts.val do + rElts := rElts.push (← resolveExpr ctx f elt) + return .Set (f a) ⟨f elts.ann, rElts⟩ + | .ListComp a elt gens => + let (compCtx, resolvedGens) ← resolveComprehensions ctx f gens.val.toList + let rElt ← resolveExpr compCtx f elt + return .ListComp (f a) rElt ⟨f gens.ann, resolvedGens.toArray⟩ + | .SetComp a elt gens => + let (compCtx, resolvedGens) ← resolveComprehensions ctx f gens.val.toList + let rElt ← resolveExpr compCtx f elt + return .SetComp (f a) rElt ⟨f gens.ann, resolvedGens.toArray⟩ + | .DictComp a key val gens => + let (compCtx, resolvedGens) ← resolveComprehensions ctx f gens.val.toList + let rKey ← resolveExpr compCtx f key + let rVal ← resolveExpr compCtx f val + return .DictComp (f a) rKey rVal ⟨f gens.ann, resolvedGens.toArray⟩ + | .GeneratorExp a elt gens => + let (compCtx, resolvedGens) ← resolveComprehensions ctx f gens.val.toList + let rElt ← resolveExpr compCtx f elt + return .GeneratorExp (f a) rElt ⟨f gens.ann, resolvedGens.toArray⟩ + | .Await a inner => return .Await (f a) (← resolveExpr ctx f inner) + | .Yield a valOpt => + let rVal ← match valOpt.val with + | some v => pure (some (← resolveExpr ctx f v)) + | none => pure none + return .Yield (f a) ⟨f valOpt.ann, rVal⟩ + | .YieldFrom a inner => return .YieldFrom (f a) (← resolveExpr ctx f inner) + | .FormattedValue a value conv fmt => + let rValue ← resolveExpr ctx f value + let rFmt ← match fmt.val with + | some fmtExpr => pure (some (← resolveExpr ctx f fmtExpr)) + | none => pure none + return .FormattedValue (f a) rValue (resolveInt f conv) ⟨f fmt.ann, rFmt⟩ + | .JoinedStr a values => + let mut rValues : Array ResolvedPythonExpr := #[] + for v in values.val do + rValues := rValues.push (← resolveExpr ctx f v) + return .JoinedStr (f a) ⟨f values.ann, rValues⟩ + | .Subscript a obj slice ectx => + let rObj ← resolveExpr ctx f obj + let rSlice ← resolveExpr ctx f slice + return .Subscript (f a) rObj rSlice (resolveExprCtx f ectx) + | .Starred a inner ectx => + return .Starred (f a) (← resolveExpr ctx f inner) (resolveExprCtx f ectx) + | .Tuple a elts ectx => + let mut rElts : Array ResolvedPythonExpr := #[] + for elt in elts.val do + rElts := rElts.push (← resolveExpr ctx f elt) + return .Tuple (f a) ⟨f elts.ann, rElts⟩ (resolveExprCtx f ectx) + | .List a elts ectx => + let mut rElts : Array ResolvedPythonExpr := #[] + for elt in elts.val do + rElts := rElts.push (← resolveExpr ctx f elt) + return .List (f a) ⟨f elts.ann, rElts⟩ (resolveExprCtx f ectx) + | .NamedExpr a target value => + let rTarget ← resolveExpr ctx f target + let rValue ← resolveExpr ctx f value + return .NamedExpr (f a) rTarget rValue + | .Lambda a args body => do + let pl ← extractParamList ctx f args + let allParams := pl.required ++ pl.optional.map (fun (n, ty, _) => (n, ty)) ++ pl.kwonly.map (fun (n, ty, _) => (n, ty)) + let lambdaCtx := allParams.foldl (fun c (n, ty) => c.insert n (CtxEntry.variable ty)) ctx + let rBody ← resolveExpr lambdaCtx f body + let rArgs ← resolveArguments lambdaCtx f args + return .Lambda (f a) rArgs rBody + | .Slice a start stop step => + let rStart ← match start.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + let rStop ← match stop.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + let rStep ← match step.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .Slice (f a) ⟨f start.ann, rStart⟩ ⟨f stop.ann, rStop⟩ ⟨f step.ann, rStep⟩ + | .TemplateStr a parts => + let mut rParts : Array ResolvedPythonExpr := #[] + for p in parts.val do + rParts := rParts.push (← resolveExpr ctx f p) + return .TemplateStr (f a) ⟨f parts.ann, rParts⟩ + | .Interpolation a value conv fmtSpec fmt => do + let rValue ← resolveExpr ctx f value + let rFmt ← match fmt.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .Interpolation (f a) rValue (resolveConstant f conv) (resolveInt f fmtSpec) ⟨f fmt.ann, rFmt⟩ + +partial def resolveAlias (f : SourceRange → ResolvedAnn) : Python.alias SourceRange → Python.alias ResolvedAnn + | .mk_alias a name asname => .mk_alias (f a) (mapAnnVal f name) (mapAnnOpt f (mapAnnVal f) asname) + +/-- Resolves a `with` item: uses `typeOfExpr` on the context expression to find the class, + then looks up `__enter__` and `__exit__` in its method list. Annotates with `.withCtx` + carrying both sigs so Translation can emit `StaticCall enter [mgr]` / `StaticCall exit [mgr]`. -/ +partial def resolveWithitem (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.withitem SourceRange → ResolveM (Python.withitem ResolvedAnn) + | .mk_withitem a ctxExpr optVars => do + let enterId := PythonIdentifier.builtin "__enter__" + let exitId := PythonIdentifier.builtin "__exit__" + let info ← match ← typeOfExpr ctx ctxExpr with + | some (.Name _ className _) => + let classId := PythonIdentifier.fromAst className + match ctx[classId]? with + | some (.class_ _ _ methods _) => + let enterSig := methods.find? (fun (mName, _) => mName == enterId) |>.map (·.2) + let exitSig := methods.find? (fun (mName, _) => mName == exitId) |>.map (·.2) + match enterSig, exitSig with + | some es, some xs => pure (NodeInfo.withCtx es xs) + | _, _ => pure NodeInfo.unresolved + | _ => pure NodeInfo.unresolved + | _ => pure NodeInfo.unresolved + let rCtxExpr ← resolveExpr ctx f ctxExpr + let rOptVars ← match optVars.val with + | some v => pure (some (← resolveExpr ctx f v)) + | none => pure none + return .mk_withitem { sr := a, info } rCtxExpr ⟨f optVars.ann, rOptVars⟩ + +partial def resolveExcepthandler (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.excepthandler SourceRange → ResolveM (Python.excepthandler ResolvedAnn) + | .ExceptHandler a ty name body => do + let handlerCtx := match name.val with + | some n => ctx.insert (PythonIdentifier.fromAst n) (CtxEntry.variable (annotationToPythonType Option.none)) + | none => ctx + let resolvedBody ← resolveBlock handlerCtx f body.val + let rTy ← match ty.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .ExceptHandler (f a) ⟨f ty.ann, rTy⟩ (mapAnnOpt f (mapAnnVal f) name) ⟨f body.ann, resolvedBody⟩ + +partial def resolveMatchCase (ctx : Ctx) (f : SourceRange → ResolvedAnn) : Python.match_case SourceRange → ResolveM (Python.match_case ResolvedAnn) + | .mk_match_case a pat guard body => do + let resolvedBody ← resolveBlock ctx f body.val + let rGuard ← match guard.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return .mk_match_case (f a) (sorry) ⟨f guard.ann, rGuard⟩ ⟨f body.ann, resolvedBody⟩ + +/-- Resolves an array of statements sequentially, threading the growing context. + Each statement may extend the context (e.g., assignments, imports, defs) which + subsequent statements in the same block can see. -/ +partial def resolveBlock (ctx : Ctx) (f : SourceRange → ResolvedAnn) (stmts : Array PythonStmt) : ResolveM (Array ResolvedPythonStmt) := do + let mut c := ctx + let mut resolved : Array ResolvedPythonStmt := #[] + for stmt in stmts do + let (c', r) ← resolveStmt c f stmt + c := c' + resolved := resolved.push r + return resolved + +/-- Resolves a function definition. Takes the pre-computed `FuncSig` (from the ClassDef handler + or freshly extracted), extends the context with the function name, builds the body context, + and resolves the body. Returns the updated ctx and all resolved sub-trees for the caller to + assemble into `FunctionDef` or `AsyncFunctionDef`. -/ +partial def resolveFuncDef (ctx : Ctx) (f : SourceRange → ResolvedAnn) + (sig : FuncSig) + (a : SourceRange) (name : Ann String SourceRange) (args : Python.arguments SourceRange) + (body : Ann PythonProgram SourceRange) (decorators : Ann (Array PythonExpr) SourceRange) + (returns : Ann (Option PythonExpr) SourceRange) (tc : Ann (Option (Ann String SourceRange)) SourceRange) + (typeParams : Ann (Array (Python.type_param SourceRange)) SourceRange) := do + let ctx' := ctx.insert (PythonIdentifier.fromAst name) (.function sig) + let bodyCtx ← resolveFunctionBody ctx' f args body.val + let ann : ResolvedAnn := { sr := a, info := .funcDecl sig } + let resolvedBody ← resolveBlock bodyCtx f body.val + let rBody : Ann (Array ResolvedPythonStmt) ResolvedAnn := ⟨f body.ann, resolvedBody⟩ + let rArgs ← resolveArguments bodyCtx f args + let mut rDecs : Array ResolvedPythonExpr := #[] + for d in decorators.val do rDecs := rDecs.push (← resolveExpr ctx' f d) + let rRets ← match returns.val with + | some e => pure (some (← resolveExpr ctx' f e)) + | none => pure none + let mut rTps : Array (Python.type_param ResolvedAnn) := #[] + for tp in typeParams.val do rTps := rTps.push (← resolveTypeParam ctx' f tp) + let rDecsAnn : Ann (Array ResolvedPythonExpr) ResolvedAnn := ⟨f decorators.ann, rDecs⟩ + let rRetsAnn : Ann (Option ResolvedPythonExpr) ResolvedAnn := ⟨f returns.ann, rRets⟩ + let rTpsAnn : Ann (Array (Python.type_param ResolvedAnn)) ResolvedAnn := ⟨f typeParams.ann, rTps⟩ + return (ctx', ann, mapAnnVal f name, rArgs, rBody, rDecsAnn, rRetsAnn, mapAnnOpt f (mapAnnVal f) tc, rTpsAnn) + +/-- Spine type resolution. Monadic: may trigger demand-driven module loads when + traversing qualified type annotations (e.g. `boto3.S3`) through module contexts. -/ +partial def typeOfExpr (ctx : Ctx) : PythonExpr → ResolveM (Option PythonType) + | .Name _ n _ => match ctx[PythonIdentifier.fromAst n]? with + | some (.variable ty) => pure (some ty) + | _ => pure none + | .Attribute _ obj fieldName _ => do + match ← typeOfExpr ctx obj with + | some (.Name _ className _) => + let classId := PythonIdentifier.fromAst className + match ctx[classId]? with + | some (.class_ _ fields _ _) => + pure (fields.find? (fun (fName, _) => fName == PythonIdentifier.fromAst fieldName) |>.map (·.2)) + | some (.module_ moduleRaw) => + let moduleCtx : Ctx := moduleRaw.fold (fun c k v => c.insert k v) {} + let fieldId := PythonIdentifier.fromAst fieldName + match moduleCtx[fieldId]? with + | some (.variable ty) => pure (some ty) + | some (.class_ _ fields _ _) => + pure (fields.find? (fun (fName, _) => fName == fieldId) |>.map (·.2)) + | none => + let baseDir ← read + let components := className.val.splitOn "." + let moduleDir := components.foldl (· / ·) baseDir + let f : SourceRange → ResolvedAnn := fun sr => { sr, info := .irrelevant } + let (subCtx, _) ← resolveModuleComponent fieldName.val moduleDir f + match subCtx[fieldId]? with + | some (.variable ty) => pure (some ty) + | _ => pure none + | _ => pure none + | _ => pure none + | _ => pure none + | _ => pure none + +/-- Resolve one imported class method from its raw AST on demand. Extracts the + FuncSig, resolves the method body, records the resolved FunctionDef into + `demandedMethods` and the owning class into `demandedClasses` for the + pipeline to translate. Memoized by qualified name. -/ +partial def resolveMethodAstSig (ctx : Ctx) (f : SourceRange → ResolvedAnn) + (classId : PythonIdentifier) (fields : List (PythonIdentifier × PythonType)) + (mAst : PythonStmt) : ResolveM FuncSig := do + match mAst with + | .FunctionDef a mName mArgs body mDecs mReturns mTc mTypeParams => + let mId := PythonIdentifier.fromAst mName + let qualName := s!"{classId.val}@{mName.val}" + let sig ← extractFuncSig ctx f mId (some classId) mArgs mDecs.val mReturns body.val + let st ← get + unless st.demandedMethods.contains qualName do + let (_, ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a mName mArgs body mDecs mReturns mTc mTypeParams + let resolvedStmt : ResolvedPythonStmt := .FunctionDef ann rName rArgs rBody rDecs rRets rTc rTps + modify fun s => { s with + demandedMethods := s.demandedMethods.insert qualName resolvedStmt + demandedClasses := s.demandedClasses.insert classId.val (classId, fields) } + pure sig + | _ => + pure { name := PythonIdentifier.builtin "?", className := some classId, params := .static {required := [], optional := [], kwonly := []}, returnType := anyType, locals := [] } + +/-- Resolve one imported top-level function / overload from its raw AST on demand. + Records the resolved FunctionDef into `demandedFunctions` under its + disambiguated Laurel name. Memoized. -/ +partial def resolveFunctionAstSig (ctx : Ctx) (f : SourceRange → ResolvedAnn) + (sig : FuncSig) (fAst : PythonStmt) : ResolveM Unit := do + match fAst with + | .FunctionDef a fName fArgs body fDecs fReturns fTc fTypeParams => + let key := sig.laurelName.text + let st ← get + unless st.demandedFunctions.contains key do + let (_, ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a fName fArgs body fDecs fReturns fTc fTypeParams + -- Re-annotate the FunctionDef with the disambiguated sig so Translation emits client$N + let ann' : ResolvedAnn := { ann with info := .funcDecl sig } + let resolvedStmt : ResolvedPythonStmt := .FunctionDef ann' rName rArgs rBody rDecs rRets rTc rTps + modify fun s => { s with demandedFunctions := s.demandedFunctions.insert key resolvedStmt } + | _ => pure () + +/-- Resolves `receiver.method(...)` calls. Monadic: uses `typeOfExpr` which may + trigger demand-driven module loads. -/ +partial def resolveMethodCall (ctx : Ctx) (receiver : PythonExpr) (methodName : Ann String SourceRange) (callArgs : Array PythonExpr := #[]) : ResolveM NodeInfo := do + let methId := PythonIdentifier.fromAst methodName + let f : SourceRange → ResolvedAnn := fun sr => { sr, info := .irrelevant } + match ← typeOfExpr ctx receiver with + | some (.Name _ className _) => + let classId := PythonIdentifier.fromAst className + match ctx[classId]? with + | some (.class_ classId fields methods methodAsts) => + match methods.find? (fun (mName, _) => mName == methId) with + | some (_, sig) => pure (.funcCall sig) + | none => match methodAsts.find? (fun (mName, _) => mName == methId) with + | some (_, mAst) => do + let sig ← resolveMethodAstSig ctx f classId fields mAst + pure (.funcCall sig) + | none => pure .unresolved + | _ => pure .unresolved + | some (.Attribute _ (.Name _ modName _) clsName _) => + -- Qualified class type (e.g. boto3.S3): chase module → class, resolve method on demand + let modId := PythonIdentifier.fromAst modName + let baseDir ← read + -- Load the submodule `mod/cls` (e.g. boto3/S3) to find the class. + let (subCtx, _) ← resolveModuleComponent clsName.val (baseDir / modName.val) f + match subCtx[PythonIdentifier.fromAst clsName]? with + | some (.class_ classId fields methods methodAsts) => + match methods.find? (fun (mName, _) => mName == methId) with + | some (_, sig) => pure (.funcCall sig) + | none => match methodAsts.find? (fun (mName, _) => mName == methId) with + | some (_, mAst) => do + let sig ← resolveMethodAstSig subCtx f classId fields mAst + pure (.funcCall sig) + | none => pure .unresolved + | _ => + -- Fall back: maybe the name is a class directly in the parent module's ctx + match ctx[modId]? with + | some (.module_ moduleRaw) => + let moduleCtx : Ctx := moduleRaw.fold (fun c k v => c.insert k v) {} + match moduleCtx[PythonIdentifier.fromAst clsName]? with + | some (.class_ classId fields methods methodAsts) => + match methods.find? (fun (mName, _) => mName == methId) with + | some (_, sig) => pure (.funcCall sig) + | none => match methodAsts.find? (fun (mName, _) => mName == methId) with + | some (_, mAst) => do + let sig ← resolveMethodAstSig moduleCtx f classId fields mAst + pure (.funcCall sig) + | none => pure .unresolved + | _ => pure .unresolved + | _ => pure .unresolved + | _ => match receiver with + | .Name _ rName _ => + let rId := PythonIdentifier.fromAst rName + match ctx[rId]? with + | some (.module_ moduleRaw) => + let moduleCtx : Ctx := moduleRaw.fold (fun c k v => c.insert k v) {} + match moduleCtx[methId]? with + | some (.function sig) => pure (.funcCall sig) + | some (.overloadedFunction overloads) => + let matched := overloads.find? fun (_, olSig, _) => + matchOverload olSig callArgs + match matched with + | some (idx, sig, astOpt) => do + let sig' := { sig with overloadIndex := some idx } + match astOpt with + | some fAst => resolveFunctionAstSig moduleCtx f sig' fAst + | none => pure () + pure (.funcCall sig') + | none => pure .unresolved + | some (.class_ cId fields methods methodAsts) => + let initId := PythonIdentifier.builtin "__init__" + match methods.find? (fun (mName, _) => mName == initId) with + | some (_, sig) => pure (.classNew cId sig) + | none => match methodAsts.find? (fun (mName, _) => mName == initId) with + | some (_, mAst) => do + let sig ← resolveMethodAstSig moduleCtx f cId fields mAst + pure (.classNew cId sig) + | none => + let emptySig : FuncSig := { name := initId, className := some cId, params := .static {required := [], optional := [], kwonly := []}, returnType := anyType, locals := [] } + pure (.classNew cId emptySig) + | _ => pure .unresolved + | _ => pure .unresolved + | _ => pure .unresolved + +/-- Load a module component from disk and resolve it. Tries `dir/name.python.st.ion` + then `dir/name/__init__.python.st.ion`. Returns the module's resolved program and Ctx. -/ +partial def resolveModuleComponent (name : String) (dir : System.FilePath) (f : SourceRange → ResolvedAnn) : ResolveM (Ctx × ResolvedPythonProgram) := do + let ionPath := dir / (name ++ ".python.st.ion") + let initPath := dir / name / "__init__.python.st.ion" + let key := ionPath.toString + let state ← get + if let some cachedCtx := state.resolvedPaths[key]? then + return (cachedCtx, { stmts := #[], moduleLocals := [] }) + let loadResult ← do + match ← (Python.readPythonStrata ionPath.toString).toBaseIO with + | .ok stmts => pure (some (ionPath, stmts)) + | .error _ => + match ← (Python.readPythonStrata initPath.toString).toBaseIO with + | .ok stmts => pure (some (initPath, stmts)) + | .error _ => pure none + match loadResult with + | some (_, stmts) => + -- Index-only scan: top-level functions resolved eagerly (few, needed for overload + -- matching); class methods stored as raw ASTs for on-demand resolution; TypedDicts + -- and other assignments skipped. Avoids folding over thousands of irrelevant stmts. + let mut ctx : Ctx := builtinContext + for stmt in stmts do + match stmt with + | .FunctionDef _ fname fargs fbody fdecs freturns _ _ => + let nameId := PythonIdentifier.fromAst fname + if hasOverloadDecorator fdecs.val then + let overloads := match ctx[nameId]? with + | some (.overloadedFunction existing) => existing + | _ => [] + let idx := overloads.length + let sig ← extractFuncSig ctx f nameId none fargs fdecs.val freturns fbody.val + ctx := ctx.insert nameId (.overloadedFunction (overloads ++ [(idx, sig, some stmt)])) + else + match ctx[nameId]? with + | some (.overloadedFunction _) => pure () -- impl stub after overloads, keep overloads + | _ => + let sig ← extractFuncSig ctx f nameId none fargs fdecs.val freturns fbody.val + ctx := ctx.insert nameId (.function sig) + | .ClassDef _ cname _ _ cbody _ _ => + let classId := PythonIdentifier.fromAst cname + let fields := cbody.val.toList.filterMap fun s => match s with + | .AnnAssign _ (.Name _ n _) annotation _ _ => some (PythonIdentifier.fromAst n, annotation) + | _ => none + let methodAsts := cbody.val.toList.filterMap fun s => match s with + | .FunctionDef _ mName _ _ _ _ _ _ => some (PythonIdentifier.fromAst mName, s) + | .AsyncFunctionDef _ mName _ _ _ _ _ _ => some (PythonIdentifier.fromAst mName, s) + | _ => none + ctx := ctx.insert classId (.class_ classId fields [] methodAsts) + | _ => pure () -- TypedDicts, assignments, imports — not needed by callers + modify fun s => { s with resolvedPaths := s.resolvedPaths.insert key ctx } + pure (ctx, { stmts := #[], moduleLocals := [] }) + | none => pure ({}, { stmts := #[], moduleLocals := [] }) + +/-- Resolve a dotted module name (e.g. "boto3.AccessAnalyzer") by converting dots to path + separators and loading the final component. -/ +partial def resolveModule (dottedName : String) (dir : System.FilePath) (f : SourceRange → ResolvedAnn) : ResolveM (Ctx × ResolvedPythonProgram) := do + let components := dottedName.splitOn "." + let moduleDir := components.dropLast.foldl (· / ·) dir + match components.getLast? with + | some name => resolveModuleComponent name moduleDir f + | none => pure ({}, { stmts := #[], moduleLocals := [] }) + +/-- The core statement resolver. Threads the context as accumulator: + - `FunctionDef`/`AsyncFunctionDef` → reuses existing sig from ctx if already registered + (e.g., by ClassDef's pre-scan), otherwise extracts fresh. Annotates with `.funcDecl`. + - `ClassDef` → pre-scans body for fields and methods, registers class in ctx with full + method list, resolves body in classCtx (self typed as class, methods visible). + - `Import`/`ImportFrom` → extends ctx with module or imported names. + - `Assign`/`AnnAssign` → extends ctx with assigned names. + - `AugAssign` → annotates with operator sig (`.funcCall`) for Translation. + - Control flow → resolves sub-blocks in current ctx (no ctx extension from if/for/while). -/ +partial def resolveStmt (ctx : Ctx) (f : SourceRange → ResolvedAnn) (s : PythonStmt) : ResolveM (Ctx × ResolvedPythonStmt) := do + match s with + | .FunctionDef a name args body decorators returns tc typeParams => + let nameId := PythonIdentifier.fromAst name + if hasOverloadDecorator decorators.val then + let sig ← extractFuncSig ctx f nameId none args decorators.val returns body.val + let overloads := match ctx[nameId]? with + | some (.overloadedFunction existing) => existing + | _ => [] + let idx := overloads.length + let ctx' := ctx.insert nameId (.overloadedFunction (overloads ++ [(idx, sig, none)])) + let (_, ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a name args body decorators returns tc typeParams + return (ctx', .FunctionDef ann rName rArgs rBody rDecs rRets rTc rTps) + else + match ctx[nameId]? with + | some (.overloadedFunction _) => + -- Non-@overload def after overloads = implementation stub. Keep the overload list. + let sig ← extractFuncSig ctx f nameId none args decorators.val returns body.val + let (_, ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a name args body decorators returns tc typeParams + return (ctx, .FunctionDef ann rName rArgs rBody rDecs rRets rTc rTps) + | _ => + let sig ← match ctx[nameId]? with + | some (.function existingSig) => pure existingSig + | _ => extractFuncSig ctx f nameId none args decorators.val returns body.val + let (ctx', ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a name args body decorators returns tc typeParams + return (ctx', .FunctionDef ann rName rArgs rBody rDecs rRets rTc rTps) + | .AsyncFunctionDef a name args body decorators returns tc typeParams => + let nameId := PythonIdentifier.fromAst name + let sig ← match ctx[nameId]? with + | some (.function existingSig) => pure existingSig + | _ => extractFuncSig ctx f nameId none args decorators.val returns body.val + let (ctx', ann, rName, rArgs, rBody, rDecs, rRets, rTc, rTps) ← + resolveFuncDef ctx f sig a name args body decorators returns tc typeParams + return (ctx', .AsyncFunctionDef ann rName rArgs rBody rDecs rRets rTc rTps) + | .ClassDef a name bases keywords body decorators typeParams => + let classId := PythonIdentifier.fromAst name + let classType : PythonType := .Name SourceRange.none ⟨SourceRange.none, name.val⟩ (.Load SourceRange.none) + let fields := body.val.toList.filterMap fun s => match s with + | .AnnAssign _ (.Name _ n _) annotation _ _ => some (PythonIdentifier.fromAst n, annotation) + | _ => Option.none + let mut methods : List (PythonIdentifier × FuncSig) := [] + for s in body.val.toList do + match s with + | .FunctionDef _ mName mArgs ⟨_, mBody⟩ mDecs mReturns _ _ => + let mId := PythonIdentifier.fromAst mName + let sig ← extractFuncSig ctx f mId (some classId) mArgs mDecs.val mReturns mBody + methods := methods ++ [(mId, sig)] + | .AsyncFunctionDef _ mName mArgs ⟨_, mBody⟩ mDecs mReturns _ _ => + let mId := PythonIdentifier.fromAst mName + let sig ← extractFuncSig ctx f mId (some classId) mArgs mDecs.val mReturns mBody + methods := methods ++ [(mId, sig)] + | _ => pure () + let ctx' := ctx.insert classId (CtxEntry.class_ classId fields methods) + let classCtx := ctx'.insert (PythonIdentifier.fromAst ⟨SourceRange.none, "self"⟩) (CtxEntry.variable classType) + let classCtx := methods.foldl (fun c (mId, mSig) => c.insert mId (CtxEntry.function mSig)) classCtx + let methodSigs := methods.map (·.2) + let resolvedBody ← resolveBlock classCtx f body.val + let mut rBases : Array ResolvedPythonExpr := #[] + for b in bases.val do rBases := rBases.push (← resolveExpr ctx' f b) + let mut rKeywords : Array (Python.keyword ResolvedAnn) := #[] + for kw in keywords.val do rKeywords := rKeywords.push (← resolveKeyword ctx' f kw) + let mut rDecorators : Array ResolvedPythonExpr := #[] + for d in decorators.val do rDecorators := rDecorators.push (← resolveExpr ctx' f d) + let mut rTypeParams : Array (Python.type_param ResolvedAnn) := #[] + for tp in typeParams.val do rTypeParams := rTypeParams.push (← resolveTypeParam ctx' f tp) + return (ctx', .ClassDef { sr := a, info := .classDecl classId fields methodSigs } (mapAnnVal f name) + ⟨f bases.ann, rBases⟩ + ⟨f keywords.ann, rKeywords⟩ + ⟨f body.ann, resolvedBody⟩ + ⟨f decorators.ann, rDecorators⟩ + ⟨f typeParams.ann, rTypeParams⟩) + | .Import a aliases => do + let baseDir ← read + let mut ctx' := ctx + for alias in aliases.val do + match alias with + | .mk_alias _ modName asName => + let registeredId := match asName.val with + | some aliasName => PythonIdentifier.fromAst aliasName + | none => PythonIdentifier.fromImport modName + let (moduleCtx, _) ← resolveModule modName.val baseDir f + ctx' := ctx'.insert registeredId (CtxEntry.module_ moduleCtx.inner.inner) + return (ctx', .Import (f a) (mapAnnArr f (resolveAlias f) aliases)) + | .ImportFrom a modName imports level => do + let baseDir ← read + let mut ctx' := ctx + match modName.val with + | some modAnn => + let (moduleCtx, _) ← resolveModule modAnn.val baseDir f + for imp in imports.val do + match imp with + | .mk_alias _ impName asName => + let registeredId := match asName.val with + | some aliasName => PythonIdentifier.fromAst aliasName + | none => PythonIdentifier.fromAst impName + match ctx'[registeredId]? with + | some _ => pure () + | none => + let impId := PythonIdentifier.fromAst impName + match moduleCtx[impId]? with + | some entry => ctx' := ctx'.insert registeredId entry + | none => ctx' := ctx'.insert registeredId CtxEntry.unresolved + | none => + for imp in imports.val do + match imp with + | .mk_alias _ impName asName => + let registeredId := match asName.val with + | some aliasName => PythonIdentifier.fromAst aliasName + | none => PythonIdentifier.fromAst impName + match ctx'[registeredId]? with + | some _ => pure () + | none => ctx' := ctx'.insert registeredId CtxEntry.unresolved + return (ctx', .ImportFrom (f a) (mapAnnOpt f (mapAnnVal f) modName) (mapAnnArr f (resolveAlias f) imports) (mapAnnOpt f (resolveInt f) level)) + | .Assign a targets value tc => do + let newNames := targets.val.toList.flatMap collectNamesFromTarget + let ctx' := newNames.foldl (fun c n => c.insert n (CtxEntry.variable (annotationToPythonType Option.none))) ctx + let mut rTargets : Array ResolvedPythonExpr := #[] + for t in targets.val do rTargets := rTargets.push (← resolveExpr ctx f t) + let rValue ← resolveExpr ctx f value + return (ctx', .Assign (f a) ⟨f targets.ann, rTargets⟩ rValue (mapAnnOpt f (mapAnnVal f) tc)) + | .AnnAssign a target ann value simple => do + let newNames := collectNamesFromTarget target + let rTarget ← resolveExpr ctx f target + let rAnn ← resolveExpr ctx f ann + let rValue ← match value.val with + | some v => pure (some (← resolveExpr ctx f v)) + | none => pure none + -- Prefer the RHS call's resolved return type (e.g. boto3.S3) over the bare + -- written annotation (e.g. S3), so method calls on the variable resolve + -- through the module and demand the class. + let varTy : PythonType := match rValue with + | some (.Call { info := .funcCall sig, .. } ..) => sig.returnType + | _ => ann + let ctx' := newNames.foldl (fun c n => c.insert n (CtxEntry.variable varTy)) ctx + return (ctx', .AnnAssign (f a) rTarget rAnn ⟨f value.ann, rValue⟩ (resolveInt f simple)) + | .AugAssign a target op value => do + let opSig : FuncSig := { name := .builtin (operatorToLaurel op), className := none, params := .static {required := [(.builtin "left", anyType), (.builtin "right", anyType)], optional := [], kwonly := []}, returnType := anyType, locals := [] } + let rTarget ← resolveExpr ctx f target + let rValue ← resolveExpr ctx f value + return (ctx, .AugAssign { sr := a, info := .funcCall opSig } rTarget (resolveOperator f op) rValue) + | .If a test body orelse => do + let rTest ← resolveExpr ctx f test + let rBody ← resolveBlock ctx f body.val + let rElse ← resolveBlock ctx f orelse.val + return (ctx, .If (f a) rTest ⟨f body.ann, rBody⟩ ⟨f orelse.ann, rElse⟩) + | .For a target iter body orelse tc => do + let rTarget ← resolveExpr ctx f target + let rIter ← resolveExpr ctx f iter + let rBody ← resolveBlock ctx f body.val + let rElse ← resolveBlock ctx f orelse.val + return (ctx, .For (f a) rTarget rIter ⟨f body.ann, rBody⟩ ⟨f orelse.ann, rElse⟩ (mapAnnOpt f (mapAnnVal f) tc)) + | .AsyncFor a target iter body orelse tc => do + let rTarget ← resolveExpr ctx f target + let rIter ← resolveExpr ctx f iter + let rBody ← resolveBlock ctx f body.val + let rElse ← resolveBlock ctx f orelse.val + return (ctx, .AsyncFor (f a) rTarget rIter ⟨f body.ann, rBody⟩ ⟨f orelse.ann, rElse⟩ (mapAnnOpt f (mapAnnVal f) tc)) + | .While a test body orelse => do + let rTest ← resolveExpr ctx f test + let rBody ← resolveBlock ctx f body.val + let rElse ← resolveBlock ctx f orelse.val + return (ctx, .While (f a) rTest ⟨f body.ann, rBody⟩ ⟨f orelse.ann, rElse⟩) + | .Try a body handlers orelse finalbody => do + let rBody ← resolveBlock ctx f body.val + let mut rHandlers : Array (Python.excepthandler ResolvedAnn) := #[] + for h in handlers.val do + rHandlers := rHandlers.push (← resolveExcepthandler ctx f h) + let rElse ← resolveBlock ctx f orelse.val + let rFinally ← resolveBlock ctx f finalbody.val + return (ctx, .Try (f a) ⟨f body.ann, rBody⟩ ⟨f handlers.ann, rHandlers⟩ ⟨f orelse.ann, rElse⟩ ⟨f finalbody.ann, rFinally⟩) + | .TryStar a body handlers orelse finalbody => do + let rBody ← resolveBlock ctx f body.val + let mut rHandlers : Array (Python.excepthandler ResolvedAnn) := #[] + for h in handlers.val do + rHandlers := rHandlers.push (← resolveExcepthandler ctx f h) + let rElse ← resolveBlock ctx f orelse.val + let rFinally ← resolveBlock ctx f finalbody.val + return (ctx, .TryStar (f a) ⟨f body.ann, rBody⟩ ⟨f handlers.ann, rHandlers⟩ ⟨f orelse.ann, rElse⟩ ⟨f finalbody.ann, rFinally⟩) + | .With a items body tc => do + let mut rItems : Array (Python.withitem ResolvedAnn) := #[] + for item in items.val do rItems := rItems.push (← resolveWithitem ctx f item) + let rBody ← resolveBlock ctx f body.val + return (ctx, .With (f a) ⟨f items.ann, rItems⟩ ⟨f body.ann, rBody⟩ (mapAnnOpt f (mapAnnVal f) tc)) + | .AsyncWith a items body tc => do + let mut rItems : Array (Python.withitem ResolvedAnn) := #[] + for item in items.val do rItems := rItems.push (← resolveWithitem ctx f item) + let rBody ← resolveBlock ctx f body.val + return (ctx, .AsyncWith (f a) ⟨f items.ann, rItems⟩ ⟨f body.ann, rBody⟩ (mapAnnOpt f (mapAnnVal f) tc)) + | .Return a value => do + let rValue ← match value.val with + | some v => pure (some (← resolveExpr ctx f v)) + | none => pure none + return (ctx, .Return (f a) ⟨f value.ann, rValue⟩) + | .Delete a targets => do + let mut rTargets : Array ResolvedPythonExpr := #[] + for t in targets.val do rTargets := rTargets.push (← resolveExpr ctx f t) + return (ctx, .Delete (f a) ⟨f targets.ann, rTargets⟩) + | .Raise a exc cause => do + let rExc ← match exc.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + let rCause ← match cause.val with + | some e => pure (some (← resolveExpr ctx f e)) + | none => pure none + return (ctx, .Raise (f a) ⟨f exc.ann, rExc⟩ ⟨f cause.ann, rCause⟩) + | .Assert a test msg => do + let rTest ← resolveExpr ctx f test + let rMsg ← match msg.val with + | some m => pure (some (← resolveExpr ctx f m)) + | none => pure none + return (ctx, .Assert (f a) rTest ⟨f msg.ann, rMsg⟩) + | .Expr a value => do + let rValue ← resolveExpr ctx f value + return (ctx, .Expr (f a) rValue) + | .Pass a => return (ctx, .Pass (f a)) + | .Break a => return (ctx, .Break (f a)) + | .Continue a => return (ctx, .Continue (f a)) + | .Global a names => return (ctx, .Global (f a) (mapAnnArr f (mapAnnVal f) names)) + | .Nonlocal a names => return (ctx, .Nonlocal (f a) (mapAnnArr f (mapAnnVal f) names)) + | .Match a subject cases => do + let rSubject ← resolveExpr ctx f subject + let mut resolvedCases : Array (Python.match_case ResolvedAnn) := #[] + for c in cases.val do + resolvedCases := resolvedCases.push (← resolveMatchCase ctx f c) + return (ctx, .Match (f a) rSubject ⟨f cases.ann, resolvedCases⟩) + | .TypeAlias a name typeParams value => do + let rName ← resolveExpr ctx f name + let mut rTypeParams : Array (Python.type_param ResolvedAnn) := #[] + for tp in typeParams.val do rTypeParams := rTypeParams.push (← resolveTypeParam ctx f tp) + let rValue ← resolveExpr ctx f value + return (ctx, .TypeAlias (f a) rName ⟨f typeParams.ann, rTypeParams⟩ rValue) +end + +/-- Result of resolving a program: the resolved AST plus the imported + declarations the program demanded (methods, functions, classes). -/ +structure ResolveResult where + program : ResolvedPythonProgram + /-- Resolved FunctionDef stmts for demanded imported methods + top-level functions. -/ + demandedStmts : Array ResolvedPythonStmt + /-- Demanded imported classes (id × fields) for Composite type emission. -/ + demandedClasses : List (PythonIdentifier × List (PythonIdentifier × PythonType)) + +/-- Entry point: resolves a full Python module. Folds `resolveStmt` over top-level + statements, threading the context. Imports are loaded on demand. -/ +def resolve (stmts : PythonProgram) (baseDir : System.FilePath := ".") : EIO String ResolveResult := do + let f : SourceRange → ResolvedAnn := fun sr => { sr, info := .irrelevant } + let moduleLocals := computeLocals stmts [] + let initCtx := moduleLocals.foldl (fun c (n, ty) => c.insert n (CtxEntry.variable ty)) builtinContext + let action : ResolveM ResolvedPythonProgram := do + let mut ctx := initCtx + let mut resolved : Array ResolvedPythonStmt := #[] + for stmt in stmts do + let (ctx', r) ← resolveStmt ctx f stmt + ctx := ctx' + resolved := resolved.push r + return { stmts := resolved, moduleLocals := moduleLocals } + let (prog, state) ← action.run baseDir |>.run {} + let demandedStmts := (state.demandedMethods.toList.map (·.2) ++ state.demandedFunctions.toList.map (·.2)).toArray + let demandedClasses := state.demandedClasses.toList.map (·.2) + return { program := prog, demandedStmts, demandedClasses } + +end -- public section +end Strata.Python.Resolution diff --git a/Strata/Languages/Python/Translation.lean b/Strata/Languages/Python/Translation.lean new file mode 100644 index 0000000000..870e83b3e4 --- /dev/null +++ b/Strata/Languages/Python/Translation.lean @@ -0,0 +1,696 @@ +/- + Copyright Strata Contributors + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ +module + +public import Strata.Languages.Laurel.Laurel +public import Strata.Languages.Python.PythonDialect +public import Strata.Languages.Python.Resolution +import Strata.DDM.Util.SourceRange + +/-! +# Pass 2: Translation + +Structural recursion over the resolved Python AST. Pattern matches on +NodeInfo and emits Laurel constructs. Never constructs Laurel.Identifier +from strings — only forwards what Resolution provided. + +Input: ResolvedPythonProgram +Output: Laurel.Program +-/ + +namespace Strata.Python.Translation + +open Strata.Laurel hiding Identifier +open Strata.Python.Resolution + +public section + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Error +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Errors that can occur during translation. -/ +inductive TransError where + /-- A Python construct with no Laurel equivalent. -/ + | unsupportedConstruct (msg : String) + /-- A bug in the translator (should never occur on well-resolved input). -/ + | internalError (msg : String) + /-- An error in the user's Python code detected during translation. -/ + | userError (range : SourceRange) (msg : String) + deriving Repr + +instance : ToString TransError where + toString + | .unsupportedConstruct msg => s!"Translation: unsupported construct: {msg}" + | .internalError msg => s!"Translation: internal error: {msg}" + | .userError _range msg => s!"User code error: {msg}" + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Monad (State for fresh counter + loop labels) +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Mutable state threaded through translation: fresh name counter, source file path, + and a stack of loop break/continue labels for translating `break`/`continue`. -/ +structure TransState where + /-- Counter for generating unique temporary names. -/ + freshCounter : Nat := 0 + /-- Path of the source file being translated (used for metadata). -/ + filePath : System.FilePath := "" + /-- Stack of (break_label, continue_label) pairs for enclosing loops. -/ + loopLabels : List (Laurel.Identifier × Laurel.Identifier) := [] + deriving Inhabited + +abbrev BaseM := StateT TransState (Except TransError) + +/-- Writer monad for translation. Produces a value plus a list of emitted Laurel statements. + Allows expressions that need prefix statements (e.g., `classNew` emits `New` + `__init__`) + to `tell` those statements and return just the expression value. -/ +structure TransM (α : Type) where + /-- Run the writer, producing the value and accumulated statement list. -/ + run : BaseM (α × List StmtExprMd) + +instance : Monad TransM where + pure a := ⟨pure (a, [])⟩ + bind ma f := ⟨do + let (a, w1) ← ma.run + let (b, w2) ← (f a).run + pure (b, w1 ++ w2)⟩ + +instance : MonadLift BaseM TransM where + monadLift ma := ⟨do let a ← ma; pure (a, [])⟩ + +instance : MonadExceptOf TransError TransM where + throw e := ⟨throw e⟩ + tryCatch ma f := ⟨tryCatch ma.run (fun e => (f e).run)⟩ + +def tell (stmts : List StmtExprMd) : TransM Unit := ⟨pure ((), stmts)⟩ + +def listen (ma : TransM α) : TransM (α × List StmtExprMd) := ⟨do + let (a, stmts) ← ma.run + pure ((a, stmts), stmts)⟩ + +def pass (ma : TransM (α × (List StmtExprMd → List StmtExprMd))) : TransM α := ⟨do + let ((a, f), stmts) ← ma.run + pure (a, f stmts)⟩ + +def collect (ma : TransM α) : TransM (α × List StmtExprMd) := + liftM (α := α × List StmtExprMd) ma.run + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Smart Constructors +-- ═══════════════════════════════════════════════════════════════════════════════ + +private def sourceRangeToMd (filePath : System.FilePath) (sr : SourceRange) : Imperative.MetaData Core.Expression := + let uri : Uri := .file filePath.toString + #[⟨ Imperative.MetaData.fileRange, .fileRange ⟨ uri, sr ⟩ ⟩] + +def mkExpr (sr : SourceRange) (expr : StmtExpr) : TransM StmtExprMd := do + pure { val := expr, md := sourceRangeToMd (← get).filePath sr } + +private def defaultMd : Imperative.MetaData Core.Expression := #[] +def mkExprDefault (expr : StmtExpr) : StmtExprMd := { val := expr, md := defaultMd } +def mkTypeDefault (ty : HighType) : HighTypeMd := { val := ty, md := defaultMd } + +def freshId (pfx : String) : TransM Laurel.Identifier := do + let s ← get; set { s with freshCounter := s.freshCounter + 1 } + pure (Laurel.Identifier.mk s!"{pfx}_{s.freshCounter}" none) + +def pushLoopLabel (pfx : String) : TransM (Laurel.Identifier × Laurel.Identifier) := do + let s ← get + let bk := Laurel.Identifier.mk s!"{pfx}_break_{s.freshCounter}" none + let ct := Laurel.Identifier.mk s!"{pfx}_continue_{s.freshCounter}" none + set { s with freshCounter := s.freshCounter + 1, loopLabels := (bk, ct) :: s.loopLabels } + pure (bk, ct) + +def popLoopLabel : TransM Unit := modify fun s => { s with loopLabels := s.loopLabels.tail! } +def currentBreakLabel : TransM (Option Laurel.Identifier) := do return (← get).loopLabels.head?.map (·.1) +def currentContinueLabel : TransM (Option Laurel.Identifier) := do return (← get).loopLabels.head?.map (·.2) + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- PythonType → HighType +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Map a resolved Python type annotation to a Laurel `HighType`. + +Base names map to Core types: `int`/`bool`/`str`/`float`/`None` to their +scalars, `Any`/`object` to `Any`, and the container names `dict`/`list` to the +homogeneous Core encodings `DictStrAny`/`ListAny`. A bare name that matches none +of these is a user-defined class (`.UserDefined`), which Translation emits as a +`Composite`. + +Subscripted generics carry the same meaning as their base: the parameterized +containers (`dict[...]`, `list[...]`, and the `typing` aliases `Dict`/`List`/ +`Tuple`/`Set`) map to the container encodings, and the type-level operators +(`Optional`/`Union`/`Literal`/`Unpack`/`NotRequired`/`Required`/`Type`) erase to +`Any`. A subscripted name with no concrete encoding is a user-defined generic +class (`.UserDefined`). The lowercase `dict`/`list` subscript cases must agree +with the bare-name cases — otherwise `body: dict[str, Any]` is typed `Composite` +while its dict-literal value is `DictStrAny`, and Core fails to unify the two. -/ +def pythonTypeToHighType : PythonType → HighType + | .Name _ n _ => match n.val with + | "int" => .TInt + | "bool" => .TBool + | "str" => .TString + | "float" => .TFloat64 + | "None" => .TVoid + | "Any" | "object" => .TCore "Any" + | "dict" => .TCore "DictStrAny" + | "list" => .TCore "ListAny" + | name => .UserDefined { text := name, uniqueId := none } + | .Constant _ (.ConNone _) _ => .TVoid + | .BinOp _ _ (.BitOr _) _ => .TCore "Any" + | .Subscript _ (.Name _ n _) _ _ => match n.val with + | "dict" | "Dict" => .TCore "DictStrAny" + | "list" | "List" | "tuple" | "Tuple" | "set" | "Set" | "frozenset" => .TCore "ListAny" + | "Optional" | "Union" | "Type" + | "Literal" | "Unpack" | "NotRequired" | "Required" => .TCore "Any" + | other => .UserDefined { text := other, uniqueId := none } + | _ => .TCore "Any" + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Runtime Constants (extracted from runtime program interface) +-- ═══════════════════════════════════════════════════════════════════════════════ + +private def rt (name : String) : Laurel.Identifier := { text := name, uniqueId := none } + +private def rtListAnyCons := rt "ListAny_cons" +private def rtListAnyNil := rt "ListAny_nil" +private def rtFromListAny := rt "from_ListAny" +private def rtDictStrAnyCons := rt "DictStrAny_cons" +private def rtDictStrAnyEmpty := rt "DictStrAny_empty" +private def rtFromDictStrAny := rt "from_DictStrAny" +private def rtFromNone := rt "from_None" +private def rtAnyGet := rt "Any_get" +private def rtAnySets := rt "Any_sets" +private def rtFromSlice := rt "from_Slice" +private def rtAnyAsInt := rt "Any..as_int!" +private def rtOptSome := rt "OptSome" +private def rtOptNone := rt "OptNone" +private def rtPAdd := rt "PAdd" +private def rtPIn := rt "PIn" +private def rtIsError := rt "isError" +private def rtToStringAny := rt "to_string_any" +private def rtLaurelResult := rt "LaurelResult" +private def rtMaybeExcept := rt "maybe_except" + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- ═══════════════════════════════════════════════════════════════════════════════ +-- The Structural Recursion +-- ═══════════════════════════════════════════════════════════════════════════════ + +mutual + +partial def translateExpr (e : Python.expr ResolvedAnn) : TransM StmtExprMd := do + let sr := e.ann.sr + match e with + | .Constant _ (.ConPos _ n) _ => mkExpr sr (.LiteralInt n.val) + | .Constant _ (.ConNeg _ n) _ => mkExpr sr (.LiteralInt (-n.val)) + | .Constant _ (.ConString _ s) _ => mkExpr sr (.LiteralString s.val) + | .Constant _ (.ConTrue _) _ => mkExpr sr (.LiteralBool true) + | .Constant _ (.ConFalse _) _ => mkExpr sr (.LiteralBool false) + | .Constant _ (.ConNone _) _ => mkExpr sr (.StaticCall rtFromNone []) + | .Constant _ (.ConFloat _ f) _ => mkExpr sr (.LiteralString f.val) + | .Constant _ _ _ => mkExpr sr .Hole + | .Name ann _ _ => match ann.info with + | .variable name => mkExpr sr (.Identifier name.toLaurel) + | .unresolved => mkExpr sr (.Hole (deterministic := false)) + | .irrelevant => mkExpr sr (.Hole (deterministic := false)) + | _ => panic! "Resolution bug: invalid NodeInfo on Name node" + | .Call ann func args kwargs => match ann.info with + | .funcCall sig => do + -- Prepend the receiver ONLY for instance methods (sig has a receiver slot). + -- A `.static` sig is a module/free function: its `.Attribute` base (e.g. the + -- module `boto3` in `boto3.client(...)`) is NOT an argument and must be dropped. + let receiver ← match sig.params, func with + | .instance _ _, .Attribute _ obj _ _ => pure [← translateExpr obj] + | _, _ => pure [] + let posArgs ← args.val.toList.mapM translateExpr + let kwargPairs ← kwargs.val.toList.filterMapM fun kw => match kw with + | .mk_keyword _ kwName kwExpr => do + let val ← translateExpr kwExpr + match kwName.val with | some n => pure (some (n.val, val)) | none => pure none + mkExpr sr (.StaticCall sig.laurelName (← sig.matchArgs (receiver ++ posArgs) kwargPairs translateExpr (mkKwargs := (do return some (← mkExpr sr (.Hole (deterministic := false))))))) + | .classNew cls initSig => do + let tmp ← freshId "new" + let tmpRef ← mkExpr sr (.Identifier tmp) + let assignNew ← mkExpr sr (.Assign [tmpRef] (← mkExpr sr (.New cls.toLaurel))) + let posArgs ← args.val.toList.mapM translateExpr + let kwargPairs ← kwargs.val.toList.filterMapM fun kw => match kw with + | .mk_keyword _ kwName kwExpr => do + let val ← translateExpr kwExpr + match kwName.val with | some n => pure (some (n.val, val)) | none => pure none + let initCall ← mkExpr sr (.StaticCall initSig.laurelName (← initSig.matchArgs ([tmpRef] ++ posArgs) kwargPairs translateExpr (mkKwargs := (do return some (← mkExpr sr (.Hole (deterministic := false))))))) + tell [assignNew, initCall] + pure tmpRef + | .unresolved => mkExpr sr (.Hole (deterministic := false)) + | _ => mkExpr sr (.Hole (deterministic := false)) + | .BinOp ann left _ right => match ann.info with + | .funcCall sig => do + let l ← translateExpr left; let r ← translateExpr right + mkExpr sr (.StaticCall sig.laurelName (← sig.matchArgs [l, r] [] translateExpr)) + | _ => mkExpr sr .Hole + | .BoolOp ann _ operands => match ann.info with + | .funcCall sig => do + let exprs ← operands.val.toList.mapM translateExpr + match exprs with + | [] => mkExpr sr .Hole + | first :: rest => rest.foldlM (fun acc e => do + let args ← sig.matchArgs [acc, e] [] translateExpr + mkExpr sr (.StaticCall sig.laurelName args)) first + | _ => mkExpr sr .Hole + | .UnaryOp ann _ operand => match ann.info with + | .funcCall sig => do + mkExpr sr (.StaticCall sig.laurelName (← sig.matchArgs [← translateExpr operand] [] translateExpr)) + | _ => mkExpr sr .Hole + | .Compare ann left _ comparators => match ann.info with + | .funcCall sig => do + if comparators.val.size != 1 then throw (.unsupportedConstruct "Chained comparisons") + let l ← translateExpr left; let r ← translateExpr comparators.val[0]! + mkExpr sr (.StaticCall sig.laurelName (← sig.matchArgs [l, r] [] translateExpr)) + | _ => mkExpr sr .Hole + | .Attribute ann obj _ _ => match ann.info with + | .attribute name => do mkExpr sr (.FieldSelect (← translateExpr obj) name.toLaurel) + | _ => mkExpr sr .Hole + | .Subscript _ container slice _ => do + let c ← translateExpr container + let idx ← match slice with + | .Slice _ start stop _ => do + let s ← match start.val with + | some e => translateExpr e + | none => mkExpr sr (.LiteralInt 0) + let e ← match stop.val with + | some e => mkExpr sr (.StaticCall rtOptSome [← translateExpr e]) + | none => mkExpr sr (.StaticCall rtOptNone []) + mkExpr sr (.StaticCall rtFromSlice [s, e]) + | _ => translateExpr slice + mkExpr sr (.StaticCall rtAnyGet [c, idx]) + | .List _ elts _ => do + let es ← elts.val.toList.mapM translateExpr + let nil ← mkExpr sr (.StaticCall rtListAnyNil []) + es.foldrM (fun e acc => mkExpr sr (.StaticCall rtListAnyCons [e, acc])) nil + | .Tuple _ elts _ => do + let es ← elts.val.toList.mapM translateExpr + let nil ← mkExpr sr (.StaticCall rtListAnyNil []) + es.foldrM (fun e acc => mkExpr sr (.StaticCall rtListAnyCons [e, acc])) nil + | .Dict _ keys vals => do + let ks ← keys.val.toList.mapM (fun k => match k with + | .some_expr _ e => translateExpr e | .missing_expr _ => mkExpr sr .Hole) + let vs ← vals.val.toList.mapM translateExpr + let empty ← mkExpr sr (.StaticCall rtDictStrAnyEmpty []) + (List.zip ks vs).foldrM (fun (k, v) acc => + mkExpr sr (.StaticCall rtDictStrAnyCons [k, v, acc])) empty + | .IfExp _ test body orelse => do + mkExpr sr (.IfThenElse (← translateExpr test) (← translateExpr body) (some (← translateExpr orelse))) + | .JoinedStr _ values => do + if values.val.isEmpty then mkExpr sr (.LiteralString "") + else do + let parts ← values.val.toList.mapM translateExpr + let init ← mkExpr sr (.LiteralString "") + parts.foldlM (fun acc p => mkExpr sr (.StaticCall rtPAdd [acc, p])) init + | .FormattedValue _ value _ _ => do + mkExpr sr (.StaticCall rtToStringAny [← translateExpr value]) + | _ => mkExpr sr .Hole + +where + ann (e : Python.expr ResolvedAnn) : ResolvedAnn := match e with + | .Name a .. => a | .Constant a .. => a | .BinOp a .. => a | .Compare a .. => a + | .BoolOp a .. => a | .UnaryOp a .. => a | .Call a .. => a | .Attribute a .. => a + | .Subscript a .. => a | .List a .. => a | .Tuple a .. => a | .Dict a .. => a + | .Set a .. => a | .IfExp a .. => a | .JoinedStr a .. => a | .FormattedValue a .. => a + | .Lambda a .. => a | .ListComp a .. => a | .SetComp a .. => a | .DictComp a .. => a + | .GeneratorExp a .. => a | .NamedExpr a .. => a | .Slice a .. => a | .Starred a .. => a + | .Await a .. => a | .Yield a .. => a | .YieldFrom a .. => a | .TemplateStr a .. => a + | .Interpolation a .. => a + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Statement Translation +-- ═══════════════════════════════════════════════════════════════════════════════ + +partial def translateStmtList (stmts : List (Python.stmt ResolvedAnn)) : TransM Unit := + stmts.forM translateStmt + +partial def execWriter (stmts : List (Python.stmt ResolvedAnn)) : TransM (List StmtExprMd) := do + let (_, s) ← collect (translateStmtList stmts) + pure s + +partial def translateAssign (sr : SourceRange) (target : Python.expr ResolvedAnn) + (value : Python.expr ResolvedAnn) : TransM Unit := do + match value with + | .Call ann _ args kwargs => match ann.info with + | .classNew cls initSig => do + let targetExpr ← translateExpr target + let assignNew ← mkExpr sr (.Assign [targetExpr] (← mkExpr sr (.New cls.toLaurel))) + let posArgs ← args.val.toList.mapM translateExpr + let kwargPairs ← kwargs.val.toList.filterMapM fun kw => match kw with + | .mk_keyword _ kwName kwExpr => do + let val ← translateExpr kwExpr + match kwName.val with | some n => pure (some (n.val, val)) | none => pure none + let initCall ← mkExpr sr (.StaticCall initSig.laurelName (← initSig.matchArgs ([targetExpr] ++ posArgs) kwargPairs translateExpr (mkKwargs := (do return some (← mkExpr sr (.Hole (deterministic := false))))))) + tell [assignNew, initCall] + | _ => tell [← mkExpr sr (.Assign [← translateExpr target] (← translateExpr value))] + | _ => tell [← mkExpr sr (.Assign [← translateExpr target] (← translateExpr value))] + +partial def translateStmt (s : Python.stmt ResolvedAnn) : TransM Unit := do + let sr := s.ann.sr + match s with + | .Assign _ targets value _ => do + if targets.val.size != 1 then throw (.unsupportedConstruct "Multiple assignment targets") + let target := targets.val[0]! + match target with + | .Tuple _ elts _ => do + let rhsExpr ← translateExpr value + let tmp ← freshId "unpack" + let tmpDecl ← mkExpr sr (.LocalVariable tmp (mkTypeDefault (.TCore "Any")) (some rhsExpr)) + let tmpRef ← mkExpr sr (.Identifier tmp) + tell [tmpDecl] + unpackTargets sr elts.val.toList tmpRef + | .Subscript .. => do + subscriptWriteBack sr target (← translateExpr value) + | _ => translateAssign sr target value + + | .AnnAssign _ target _ value _ => do + match value.val with + | some val => translateAssign sr target val + | none => pure () + + | .AugAssign ann target _ value => match ann.info with + | .funcCall sig => do + let t ← translateExpr target; let v ← translateExpr value + let newVal ← mkExpr sr (.StaticCall sig.laurelName (← sig.matchArgs [t, v] [] translateExpr)) + match target with + | .Subscript .. => subscriptWriteBack sr target newVal + | _ => tell [← mkExpr sr (.Assign [t] newVal)] + | _ => tell [← mkExpr sr .Hole] + + | .If _ test body orelse => do + let cond ← translateExpr test + let thn ← mkExpr sr (.Block (← execWriter body.val.toList) none) + let els ← if orelse.val.isEmpty then pure none + else pure (some (← mkExpr sr (.Block (← execWriter orelse.val.toList) none))) + tell [← mkExpr sr (.IfThenElse cond thn els)] + + | .While _ test body _ => do + let (bk, ct) ← pushLoopLabel "loop" + let cond ← translateExpr test + let inner ← mkExpr sr (.Block (← execWriter body.val.toList) (some ct.text)) + let outer ← mkExpr sr (.Block [← mkExpr sr (.While cond [] none inner)] (some bk.text)) + popLoopLabel; tell [outer] + + | .For _ target iter body _ _ => do + let (bk, ct) ← pushLoopLabel "for" + let iterExpr ← translateExpr iter + let bodyStmts ← execWriter body.val.toList + let (havocStmts, assumeTarget) ← match target with + | .Tuple _ elts _ => do + let tmp ← freshId "for_iter" + let tmpRef ← mkExpr sr (.Identifier tmp) + let decl ← mkExpr sr (.LocalVariable tmp (mkTypeDefault (.TCore "Any")) none) + let havoc ← mkExpr sr (.Assign [tmpRef] (← mkExpr sr (.Hole (deterministic := false)))) + let (_, unpacks) ← collect (unpackTargets sr elts.val.toList tmpRef) + pure ([decl, havoc] ++ unpacks, tmpRef) + | _ => do + let tgt ← translateExpr target + let havoc ← mkExpr sr (.Assign [tgt] (← mkExpr sr (.Hole (deterministic := false)))) + pure ([havoc], tgt) + let assume ← mkExpr sr (.Assume (← mkExpr sr (.StaticCall rtPIn [assumeTarget, iterExpr]))) + let inner ← mkExpr sr (.Block (havocStmts ++ [assume] ++ bodyStmts) (some ct.text)) + let outer ← mkExpr sr (.Block [inner] (some bk.text)) + popLoopLabel; tell [outer] + + | .Return _ value => do + match value.val with + | some expr => do + let e ← translateExpr expr + tell [← mkExpr sr (.Assign [← mkExpr sr (.Identifier rtLaurelResult)] e), ← mkExpr sr (.Exit "$body")] + | none => tell [← mkExpr sr (.Exit "$body")] + + | .Assert _ test _ => tell [← mkExpr sr (.Assert (← translateExpr test))] + | .Expr _ (.Constant _ (.ConString _ _) _) => pure () + | .Expr _ value => tell [← translateExpr value] + | .Pass _ => pure () + | .Break _ => tell [← mkExpr sr (.Exit ((← currentBreakLabel).map (·.text) |>.getD "break"))] + | .Continue _ => tell [← mkExpr sr (.Exit ((← currentContinueLabel).map (·.text) |>.getD "continue"))] + + | .Try _ body handlers _ _ => translateTryExcept sr body handlers + | .TryStar _ body handlers _ _ => translateTryExcept sr body handlers + + | .With _ items body _ => do + let (pre, post) ← items.val.toList.foldlM (fun acc item => do + let (pre, post) := acc + match item with + | .mk_withitem ann ctxExpr optVars => do + let mgr ← translateExpr ctxExpr + match ann.info with + | .withCtx enterSig exitSig => + let enterCall ← mkExpr sr (.StaticCall enterSig.laurelName [mgr]) + let exitCall ← mkExpr sr (.StaticCall exitSig.laurelName [mgr]) + match optVars.val with + | some varExpr => + pure (pre ++ [← mkExpr sr (.Assign [← translateExpr varExpr] enterCall)], post ++ [exitCall]) + | none => pure (pre ++ [enterCall], post ++ [exitCall]) + | _ => + let enter ← mkExpr sr (.Hole (deterministic := false)) + let exit ← mkExpr sr (.Hole (deterministic := false)) + match optVars.val with + | some varExpr => + pure (pre ++ [← mkExpr sr (.Assign [← translateExpr varExpr] enter)], post ++ [exit]) + | none => pure (pre ++ [enter], post ++ [exit]) + ) (([] : List StmtExprMd), ([] : List StmtExprMd)) + let bodyStmts ← execWriter body.val.toList + tell (pre ++ bodyStmts ++ post) + + | .Raise _ exc _ => do + match exc.val with + | some excExpr => do + let errorExpr ← translateExpr excExpr + tell [← mkExpr sr (.Assign [← mkExpr sr (.Identifier rtMaybeExcept)] errorExpr)] + | none => tell [← mkExpr sr (.Assign [← mkExpr sr (.Identifier rtMaybeExcept)] (← mkExpr sr .Hole))] + + | .Import _ _ => pure () + | .ImportFrom _ _ _ _ => pure () + | .Global _ _ => pure () + | .Nonlocal _ _ => pure () + | .Delete _ _ => pure () + | .AsyncFor _ _ _ _ _ _ => tell [← mkExpr sr .Hole] + | .AsyncWith _ _ _ _ => tell [← mkExpr sr .Hole] + | .Match _ _ _ => tell [← mkExpr sr .Hole] + | .TypeAlias _ _ _ _ => pure () + | .FunctionDef _ _ _ _ _ _ _ _ => pure () + | .AsyncFunctionDef _ _ _ _ _ _ _ _ => pure () + | .ClassDef _ _ _ _ _ _ _ => pure () + +where + ann (s : Python.stmt ResolvedAnn) : ResolvedAnn := match s with + | .FunctionDef a .. => a | .AsyncFunctionDef a .. => a | .ClassDef a .. => a + | .Return a .. => a | .Delete a .. => a | .Assign a .. => a | .AugAssign a .. => a + | .AnnAssign a .. => a | .For a .. => a | .AsyncFor a .. => a | .While a .. => a + | .If a .. => a | .With a .. => a | .AsyncWith a .. => a | .Raise a .. => a + | .Try a .. => a | .TryStar a .. => a | .Assert a .. => a | .Import a .. => a + | .ImportFrom a .. => a | .Global a .. => a | .Nonlocal a .. => a | .Expr a .. => a + | .Pass a => { sr := a.sr, info := .irrelevant } | .Break a => { sr := a.sr, info := .irrelevant } + | .Continue a => { sr := a.sr, info := .irrelevant } | .Match a .. => a | .TypeAlias a .. => a + +partial def unpackTargets (sr : SourceRange) (elts : List (Python.expr ResolvedAnn)) + (sourceRef : StmtExprMd) : TransM Unit := do + for (elt, idx) in elts.zipIdx do + let getExpr ← mkExpr sr (.StaticCall rtAnyGet [sourceRef, ← mkExpr sr (.LiteralInt ↑idx)]) + match elt with + | .Tuple _ innerElts _ => do + let innerTmp ← freshId "unpack" + let innerRef ← mkExpr sr (.Identifier innerTmp) + let innerDecl ← mkExpr sr (.LocalVariable innerTmp (mkTypeDefault (.TCore "Any")) (some getExpr)) + tell [innerDecl] + unpackTargets sr innerElts.val.toList innerRef + | _ => do + let tgt ← translateExpr elt + tell [← mkExpr sr (.Assign [tgt] getExpr)] + +partial def collectSubscriptChain (expr : Python.expr ResolvedAnn) : TransM (Python.expr ResolvedAnn × List (Python.expr ResolvedAnn)) := do + match expr with + | .Subscript _ container slice _ => + let (root, innerIndices) ← collectSubscriptChain container + pure (root, innerIndices ++ [slice]) + | other => pure (other, []) + +/-- Write `rhs` back into the subscript target `a[i]...[j]` via `Any_sets`, then + assign the updated container to its root. Used by both plain and augmented + subscript assignment — a subscript is not an lvalue identifier. -/ +partial def subscriptWriteBack (sr : SourceRange) (target : Python.expr ResolvedAnn) + (rhs : StmtExprMd) : TransM Unit := do + let (root, indices) ← collectSubscriptChain target + let rootExpr ← translateExpr root + let idxList ← indices.foldrM (fun idx acc => do + let idxExpr ← match idx with + | .Slice _ start stop _ => do + let s' ← match start.val with + | some e => mkExpr sr (.StaticCall rtAnyAsInt [← translateExpr e]) + | none => mkExpr sr (.LiteralInt 0) + let e' ← match stop.val with + | some e => mkExpr sr (.StaticCall rtOptSome [← mkExpr sr (.StaticCall rtAnyAsInt [← translateExpr e])]) + | none => mkExpr sr (.StaticCall rtOptNone []) + mkExpr sr (.StaticCall rtFromSlice [s', e']) + | _ => translateExpr idx + mkExpr sr (.StaticCall rtListAnyCons [idxExpr, acc]) + ) (← mkExpr sr (.StaticCall rtListAnyNil [])) + let setsCall ← mkExpr sr (.StaticCall rtAnySets [idxList, rootExpr, rhs]) + tell [← mkExpr sr (.Assign [rootExpr] setsCall)] + +partial def translateTryExcept (sr : SourceRange) + (body : Ann (Array (Python.stmt ResolvedAnn)) ResolvedAnn) + (handlers : Ann (Array (Python.excepthandler ResolvedAnn)) ResolvedAnn) : TransM Unit := do + let tryLabel := s!"try_end_{sr.start.byteIdx}" + let catchersLabel := s!"exception_handlers_{sr.start.byteIdx}" + let bodyStmts ← execWriter body.val.toList + let withChecks ← bodyStmts.foldlM (fun acc stmt => do + let ref ← mkExpr sr (.Identifier rtMaybeExcept) + let check ← mkExpr sr (.StaticCall rtIsError [ref]) + let ifCheck ← mkExpr sr (.IfThenElse check (← mkExpr sr (.Exit catchersLabel)) none) + pure (acc ++ [stmt, ifCheck]) + ) ([] : List StmtExprMd) + let exitTry ← mkExpr sr (.Exit tryLabel) + let catchers ← mkExpr sr (.Block (withChecks ++ [exitTry]) (some catchersLabel)) + let handlerLists ← handlers.val.toList.mapM fun handler => match handler with + | .ExceptHandler _ _ _ handlerBody => execWriter handlerBody.val.toList + let handlerStmts := handlerLists.flatten + tell [← mkExpr sr (.Block ([catchers] ++ handlerStmts) (some tryLabel))] + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Function / Class / Module — reads NodeInfo directly +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Rewrite identifiers in a precondition expression: each declared parameter name + `x` becomes the input name `$in_x`. Laurel `requires` clauses are evaluated in + the procedure's INPUT scope (where params are named `$in_x`), not the body scope + (where they are copied to locals `x`). -/ +partial def renameParamsToInputs (paramNames : List String) (e : StmtExprMd) : StmtExprMd := + let rw := renameParamsToInputs paramNames + let rwOpt := fun (o : Option StmtExprMd) => o.map rw + let rwList := fun (l : List StmtExprMd) => l.map rw + let val := match e.val with + | .Identifier name => + if paramNames.contains name.text then .Identifier { name with text := s!"$in_{name.text}" } else e.val + | .IfThenElse c t el => .IfThenElse (rw c) (rw t) (rwOpt el) + | .Block ss l => .Block (rwList ss) l + | .Assign ts v => .Assign (rwList ts) (rw v) + | .FieldSelect t fn => .FieldSelect (rw t) fn + | .PureFieldUpdate t fn nv => .PureFieldUpdate (rw t) fn (rw nv) + | .StaticCall c args => .StaticCall c (rwList args) + | .PrimitiveOp op args => .PrimitiveOp op (rwList args) + | .ReferenceEquals l r => .ReferenceEquals (rw l) (rw r) + | .AsType t ty => .AsType (rw t) ty + | .IsType t ty => .IsType (rw t) ty + | .InstanceCall t c args => .InstanceCall (rw t) c (rwList args) + | .Old v => .Old (rw v) + | .Fresh v => .Fresh (rw v) + | .Assert c => .Assert (rw c) + | .Assume c => .Assume (rw c) + | .Return v => .Return (rwOpt v) + | other => other + { e with val } + +partial def translateFunction (sig : FuncSig) (body : Array (Python.stmt ResolvedAnn)) + (sr : SourceRange) : TransM Procedure := do + let declInputs := sig.laurelDeclInputs + let inputs : List Laurel.Parameter := declInputs.map fun (lId, pTy) => + { name := { text := s!"$in_{lId.text}", uniqueId := none }, type := mkTypeDefault (pythonTypeToHighType pTy) } + let outputs : List Laurel.Parameter := + [{ name := rtLaurelResult, type := mkTypeDefault (pythonTypeToHighType sig.returnType) }, + { name := rtMaybeExcept, type := mkTypeDefault (.TCore "Error") }] + let paramCopies := declInputs.map fun (lId, pTy) => + mkExprDefault (.LocalVariable lId (mkTypeDefault (pythonTypeToHighType pTy)) + (some (mkExprDefault (.Identifier { text := s!"$in_{lId.text}", uniqueId := none })))) + let localDecls := sig.laurelLocals.map fun (lId, lTy) => + mkExprDefault (.LocalVariable lId (mkTypeDefault (pythonTypeToHighType lTy)) none) + let (preAsserts, restBody) := body.toList.span fun s => match s with + | .Assert _ _ _ => true | _ => false + let paramNames := declInputs.map (·.1.text) + let preconditions ← preAsserts.mapM fun s => match s with + | .Assert _ test _ => do pure (renameParamsToInputs paramNames (← translateExpr test)) + | _ => unreachable! + let bodyStmts ← execWriter restBody + let bodyBlock ← mkExpr sr (.Block (paramCopies ++ localDecls ++ bodyStmts) none) + let md := sourceRangeToMd (← get).filePath sr + pure { + name := sig.laurelName + inputs := inputs + outputs := outputs + preconditions := preconditions + determinism := .deterministic none + decreases := none + isFunctional := false + body := .Transparent bodyBlock + md := md + } + +partial def translateClass (name : PythonIdentifier) (attributes : List (PythonIdentifier × PythonType)) + (_methods : List FuncSig) (body : Array (Python.stmt ResolvedAnn)) + : TransM (TypeDefinition × List Procedure) := do + let laurelFields := attributes.map fun (fId, fTy) => + ({ name := fId.toLaurel, isMutable := true, type := mkTypeDefault (pythonTypeToHighType fTy) } : Field) + let procResults ← body.toList.mapM fun stmt => match stmt with + | .FunctionDef ann _ _ fbody _ _ _ _ => match ann.info with + | .funcDecl sig => do pure (some (← translateFunction sig fbody.val ann.sr)) + | _ => pure none + | .AsyncFunctionDef ann _ _ fbody _ _ _ _ => match ann.info with + | .funcDecl sig => do pure (some (← translateFunction sig fbody.val ann.sr)) + | _ => pure none + | _ => pure none + let procs := procResults.filterMap id + let ct : CompositeType := { name := name.toLaurel, extending := [], fields := laurelFields, instanceProcedures := [] } + pure (.Composite ct, procs) + +partial def translateModule (program : ResolvedPythonProgram) : TransM Laurel.Program := do + let init : List Procedure × List TypeDefinition × List (Python.stmt ResolvedAnn) := ([], [], []) + let (procedures, types, otherStmts) ← program.stmts.toList.foldlM (fun (procs, tys, others) stmt => do + match stmt with + | .FunctionDef ann _ _ body _ _ _ _ => match ann.info with + | .funcDecl sig => + let proc ← translateFunction sig body.val ann.sr + pure (procs ++ [proc], tys, others) + | _ => pure (procs, tys, others) + | .AsyncFunctionDef ann _ _ body _ _ _ _ => match ann.info with + | .funcDecl sig => + let proc ← translateFunction sig body.val ann.sr + pure (procs ++ [proc], tys, others) + | _ => pure (procs, tys, others) + | .ClassDef ann _ _ _ body _ _ => match ann.info with + | .classDecl name fields methods => + let (td, ms) ← translateClass name fields methods body.val + pure (procs ++ ms, tys ++ [td], others) + | _ => pure (procs, tys, others) + | other => pure (procs, tys, others ++ [other]) + ) init + let procedures ← if otherStmts.isEmpty then pure procedures + else do + let sr : SourceRange := default + let nameId := rt "__name__" + let nameDecl ← mkExpr sr (.LocalVariable nameId (mkTypeDefault .TString) (some (mkExprDefault (.LiteralString "__main__")))) + let localDecls := program.moduleLocals.map fun (lId, lTy) => + mkExprDefault (.LocalVariable lId.toLaurel (mkTypeDefault (pythonTypeToHighType lTy)) none) + let bodyStmts ← execWriter otherStmts + let bodyBlock ← mkExpr sr (.Block ([nameDecl] ++ localDecls ++ bodyStmts) none) + let mainOutputs : List Laurel.Parameter := + [{ name := rtLaurelResult, type := mkTypeDefault (.TCore "Any") }, + { name := rtMaybeExcept, type := mkTypeDefault (.TCore "Error") }] + let mainMd := sourceRangeToMd (← get).filePath sr + let mainProc : Procedure := { name := rt "__main__", inputs := [], outputs := mainOutputs, preconditions := [], determinism := .deterministic none, decreases := none, isFunctional := false, body := .Transparent bodyBlock, md := mainMd } + pure (procedures ++ [mainProc]) + return { staticProcedures := procedures, staticFields := [], types, constants := [] } + +end -- mutual + +-- ═══════════════════════════════════════════════════════════════════════════════ +-- Runner +-- ═══════════════════════════════════════════════════════════════════════════════ + +/-- Entry point: translates a resolved Python program to a Laurel program. + Returns the Laurel program and final translator state, or a `TransError`. -/ +def runTranslation (program : ResolvedPythonProgram) + (filePath : String := "") + : Except TransError (Laurel.Program × TransState) := + (translateModule program).run.run { filePath := filePath } |>.map fun ((prog, _stmts), state) => (prog, state) + +end -- public section +end Strata.Python.Translation diff --git a/StrataMain.lean b/StrataMain.lean index 115656dea9..9eb82c63cd 100644 --- a/StrataMain.lean +++ b/StrataMain.lean @@ -691,6 +691,156 @@ def pyAnalyzeLaurelCommand : Command where Core.Sarif.writeSarifOutput checkMode files vcResults (filePath ++ ".sarif") printPyAnalyzeSummary vcResults checkMode +def pyAnalyzeV2Command : Command where + name := "pyAnalyzeV2" + args := [ "file" ] + flags := [{ name := "verbose", help := "Enable verbose output." }, + { name := "no-solve", help := "Generate SMT-Lib files but do not invoke the solver." }, + { name := "profile", help := "Print elapsed time for each pipeline step." }, + { name := "quiet", help := "Suppress warnings on stderr." }, + checkModeFlag, checkLevelFlag, + { name := "sarif", help := "Write results as SARIF to .sarif." }, + { name := "vc-directory", + help := "Store VCs in SMT-Lib format in .", + takesArg := .arg "dir" }, + { name := "unique-bound-names", help := "Use globally unique names for quantifier-bound variables." }, + { name := "keep-all-files", + help := "Store intermediate Laurel and Core programs in .", + takesArg := .arg "dir" }] + help := "Verify a Python Ion program via the V2 pipeline (Resolution → Translation → Elaboration → Core)." + callback := fun v pflags => do + let verbose := pflags.getBool "verbose" + let profile := pflags.getBool "profile" + let quiet := pflags.getBool "quiet" + let outputSarif := pflags.getBool "sarif" + let filePath := v[0] + let pySourceOpt ← tryReadPythonSource filePath + let keepDir := pflags.getString "keep-all-files" + let baseName := deriveBaseName filePath + if let some dir := keepDir then + IO.FS.createDirAll dir + + let sourcePath := pySourceOpt.map (·.1) + -- Build FileMap for source position resolution. + let mfm : Option (String × Lean.FileMap) := match pySourceOpt with + | some (pyPath, srcText) => some (pyPath, .ofString srcText) + | none => none + let combinedLaurel ← + match ← Strata.pyAnalyzeLaurelV2 filePath sourcePath + (profile := profile) (quiet := quiet) |>.toBaseIO with + | .ok r => pure r + | .error (.userCode range msg) => + let location := if range.isNone then "" else + match mfm with + | some (_, fm) => + let pos := fm.toPosition range.start + s!" at line {pos.line}, col {pos.column}" + | none => "" + exitPyAnalyzeUserError s!"{msg}{location}" + | .error (.knownLimitation msg) => + exitPyAnalyzeKnownLimitation msg + | .error (.internal msg) => + exitPyAnalyzeInternalError msg + + if verbose then + IO.println "\n==== Laurel Program ====" + IO.println f!"{combinedLaurel}" + + if let some dir := keepDir then + let path := s!"{dir}/{baseName}.laurel" + IO.FS.writeFile path (toString (Std.Format.pretty f!"{combinedLaurel}") ++ "\n") + + -- Old lowering passes are subsumed by Elaboration (already run in pyAnalyzeLaurelV2). + let (coreProgramOption, laurelTranslateErrors, loweredLaurel) ← + profileStep profile "Laurel to Core translation" do + pure (Strata.translateCombinedLaurelMinimal combinedLaurel) + + if let some dir := keepDir then + let path := s!"{dir}/{baseName}.lowered.laurel" + IO.FS.writeFile path (toString (Std.Format.pretty f!"{loweredLaurel}") ++ "\n") + + let coreProgram ← + match coreProgramOption with + | none => + exitPyAnalyzeInternalError s!"Laurel to Core translation failed: {laurelTranslateErrors}" + | some core => pure core + + if verbose then + IO.println "\n==== Core Program ====" + IO.print coreProgram + + -- Split prelude / user procedure names. + let userSourcePath := sourcePath.getD filePath + let (preludeNames, userProcNames) := + Strata.splitProcNames coreProgram [userSourcePath] + + if let some dir := keepDir then + let path := s!"{dir}/{baseName}.core" + IO.FS.writeFile path (toString coreProgram) + + -- Verify using Core verifier + let checkMode ← parseCheckMode pflags + let checkLevel ← parseCheckLevel pflags + let noSolve := pflags.getBool "no-solve" + if noSolve && (pflags.getString "vc-directory").isNone && keepDir.isNone then + exitCmdFailure "pyAnalyzeV2" + "--no-solve requires --vc-directory or \ + --keep-all-files to specify where SMT \ + files are stored." + let uniqueBoundNames := pflags.getBool "unique-bound-names" + let baseOptions : VerifyOptions := + { VerifyOptions.default with + stopOnFirstError := false, verbose := .quiet, solver := Core.defaultSolver, + removeIrrelevantAxioms := .Precise, + checkMode := checkMode, checkLevel := checkLevel, + skipSolver := noSolve, + alwaysGenerateSMT := noSolve, + uniqueBoundNames := uniqueBoundNames, + profile := profile } + let options : VerifyOptions := match pflags.getString "vc-directory" with + | .some dir => { baseOptions with vcDirectory := some (dir : System.FilePath) } + | .none => match keepDir with + | some dir => { baseOptions with vcDirectory := some (s!"{dir}/{baseName}" : System.FilePath) } + | none => baseOptions + let vcResults ← profileStep profile "SMT verification" do + match ← Core.verifyProgram coreProgram options + (moreFns := Strata.Python.ReFactory) + (proceduresToVerify := some userProcNames) + (externalPhases := [Strata.frontEndPhase]) |>.toBaseIO with + | .ok r => pure r + | .error msg => exitPyAnalyzeInternalError msg + + -- Print translation errors (always on stderr) + if !laurelTranslateErrors.isEmpty then + IO.eprintln "\n==== Errors ====" + for err in laurelTranslateErrors do + IO.eprintln err + + -- Print per-VC results by default, unless SARIF mode is used + if !outputSarif then + let mut s := "" + for vcResult in vcResults do + let fileMap := mfm.map (·.2) + let location := match Imperative.getFileRange vcResult.obligation.metadata with + | some fr => + if fr.range.isNone then "" + else s!"{fr.format fileMap (includeEnd? := false)}" + | none => "" + let messageSuffix := match vcResult.obligation.metadata.getPropertySummary with + | some msg => s!" - {msg}" + | none => s!" - {vcResult.obligation.label}" + let outcomeStr := vcResult.formatOutcome + let loc := if !location.isEmpty then s!"{location}: " else "unknown location: " + s := s ++ s!"{loc}{outcomeStr}{messageSuffix}\n" + IO.print s + -- Output in SARIF format if requested + if outputSarif then + let files := match mfm with + | some (pyPath, fm) => Map.empty.insert (Strata.Uri.file pyPath) fm + | none => Map.empty + Core.Sarif.writeSarifOutput checkMode files vcResults (filePath ++ ".sarif") + printPyAnalyzeSummary vcResults checkMode + def pyAnalyzeToGotoCommand : Command where name := "pyAnalyzeToGoto" args := [ "file" ] @@ -1212,6 +1362,7 @@ def commandGroups : List CommandGroup := [ commands := [javaGenCommand] }, { name := "Python" commands := [pyAnalyzeCommand, pyAnalyzeLaurelCommand, + pyAnalyzeV2Command, pyResolveOverloadsCommand, pySpecsCommand, pySpecToLaurelCommand, pyAnalyzeLaurelToGotoCommand, diff --git a/StrataTest/Languages/Python/datetime_stub.py b/StrataTest/Languages/Python/datetime_stub.py new file mode 100644 index 0000000000..ebd16283e0 --- /dev/null +++ b/StrataTest/Languages/Python/datetime_stub.py @@ -0,0 +1,18 @@ +"""Datetime module stub for Resolution.""" + +class datetime: + @staticmethod + def now() -> 'datetime': + pass + + @staticmethod + def strptime(date_string: str, format: str) -> 'datetime': + pass + +class date: + @staticmethod + def today() -> 'date': + pass + +def timedelta(days: int = 0, hours: int = 0) -> int: + pass diff --git a/StrataTest/Languages/Python/diff_test.sh b/StrataTest/Languages/Python/diff_test.sh new file mode 100755 index 0000000000..a3cd5cc276 --- /dev/null +++ b/StrataTest/Languages/Python/diff_test.sh @@ -0,0 +1,652 @@ +#!/bin/bash +# ============================================================================= +# Differential Testing Infrastructure for Python -> Laurel Refactor +# ============================================================================= +# +# Usage: +# ./diff_test.sh baseline Capture old pipeline outputs +# ./diff_test.sh compare [command] Compare new pipeline against baseline +# ./diff_test.sh single Run both pipelines on one test +# ./diff_test.sh summary Show stored results summary +# +# The baseline command stores results from pyAnalyzeLaurel. +# The compare command runs pyAnalyzeLaurelRefactored (or specified command) +# and diffs against stored baseline. +# +# Exit codes: +# 0 - No regressions +# 1 - Regressions found (or usage error) +# ============================================================================= + +set -o pipefail + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../../.." && pwd)" +STRATA_BIN="$ROOT_DIR/.lake/build/bin/strata" +TEST_DIR="$SCRIPT_DIR/tests" +BASELINE_DIR="$SCRIPT_DIR/baseline" +RESULTS_DIR="$SCRIPT_DIR/results" + +# Pipeline commands +OLD_PIPELINE="pyAnalyzeLaurel" +NEW_PIPELINE="pyAnalyzeV2" + +# Timeout per test (seconds) +TIMEOUT=10 + +# Parallelism +PARALLEL_JOBS=4 + +# Add cvc5 to PATH +export PATH="/Users/somayyas/bin:$PATH" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +# Colors for terminal output (disabled if not a tty) +if [ -t 1 ]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + BOLD='\033[1m' + RESET='\033[0m' +else + RED='' GREEN='' YELLOW='' BLUE='' BOLD='' RESET='' +fi + +die() { + echo "ERROR: $*" >&2 + exit 1 +} + +usage() { + echo "Usage: $0 [args...]" + echo "" + echo "Commands:" + echo " baseline Capture old pipeline (pyAnalyzeLaurel) results" + echo " compare [cmd] Compare new pipeline against baseline" + echo " Default cmd: pyAnalyzeLaurelRefactored" + echo " single Run both pipelines on a single test" + echo " summary Show summary of last comparison results" + echo " list List all available test files" + echo "" + echo "Examples:" + echo " $0 baseline" + echo " $0 compare" + echo " $0 compare pyAnalyzeLaurelRefactored" + echo " $0 single test_arithmetic" + echo " $0 summary" + exit 1 +} + +# Extract test name from file path: test_foo.python.st.ion -> test_foo +testname_from_file() { + local f="$1" + basename "$f" .python.st.ion +} + +# Regenerate all Ion files from Python sources unconditionally +regen_ion_files() { + for pyfile in $(find "$TEST_DIR" -name '*.py' -type f); do + local ionfile="${pyfile%.py}.python.st.ion" + python3 -m strata.gen py_to_strata "$pyfile" "$ionfile" 2>/dev/null || true + done +} + +# Get all test files +get_test_files() { + regen_ion_files + find "$TEST_DIR" -name '*.python.st.ion' -type f | sort +} + +# Run a pipeline command on a test file with timeout. +# Captures stdout, stderr, and exit code. +# Arguments: +# Returns: exit code of the pipeline +run_pipeline() { + local cmd="$1" + local test_file="$2" + local stdout_file="$3" + local stderr_file="$4" + + # Run from the repo root so relative paths in strata work + (cd "$ROOT_DIR" && \ + timeout "$TIMEOUT" "$STRATA_BIN" "$cmd" "$test_file" \ + >"$stdout_file" 2>"$stderr_file" + ) + return $? +} + +# Classify a pipeline result based on exit code and output. +# Prints one of: pass, fail, error, timeout, crash +classify_result() { + local exit_code="$1" + local stdout_file="$2" + + if [ "$exit_code" -eq 124 ]; then + echo "timeout" + return + fi + + # Check for RESULT line in output (structured output from pyAnalyzeLaurel) + local result_line + result_line=$(grep '^RESULT:' "$stdout_file" 2>/dev/null | tail -1) + + if [ -n "$result_line" ]; then + case "$result_line" in + *"Analysis success"*) echo "pass" ;; + *"Inconclusive"*) echo "inconclusive" ;; + *"Failures found"*) echo "fail" ;; + *"User error"*) echo "user_error" ;; + *"Known limitation"*) echo "known_limitation" ;; + *"Internal error"*) echo "internal_error" ;; + *) echo "unknown" ;; + esac + elif [ "$exit_code" -eq 0 ]; then + echo "pass" + elif [ "$exit_code" -eq 1 ]; then + echo "user_error" + elif [ "$exit_code" -eq 2 ]; then + echo "fail" + elif [ "$exit_code" -eq 3 ]; then + echo "internal_error" + elif [ "$exit_code" -eq 4 ]; then + echo "known_limitation" + else + echo "crash" + fi +} + +# --------------------------------------------------------------------------- +# Phase 1: Capture Baseline +# --------------------------------------------------------------------------- + +cmd_baseline() { + echo -e "${BOLD}=== Capturing Baseline (${OLD_PIPELINE}) ===${RESET}" + echo "" + + # Verify strata binary exists + [ -x "$STRATA_BIN" ] || die "Strata binary not found at: $STRATA_BIN" + + # Create baseline directory + mkdir -p "$BASELINE_DIR" + + local total=0 + local succeeded=0 + local failed=0 + + local test_files + test_files=$(get_test_files) + local file_count + file_count=$(echo "$test_files" | wc -l | tr -d ' ') + + echo "Running $OLD_PIPELINE on $file_count test files..." + echo "" + + for test_file in $test_files; do + local name + name=$(testname_from_file "$test_file") + total=$((total + 1)) + + local rel_path + rel_path="${test_file#$ROOT_DIR/}" + + local stdout_file="$BASELINE_DIR/${name}.stdout" + local stderr_file="$BASELINE_DIR/${name}.stderr" + local meta_file="$BASELINE_DIR/${name}.meta" + + run_pipeline "$OLD_PIPELINE" "$rel_path" "$stdout_file" "$stderr_file" + local exit_code=$? + + local category + category=$(classify_result "$exit_code" "$stdout_file") + + # Write metadata + echo "exit_code=$exit_code" > "$meta_file" + echo "category=$category" >> "$meta_file" + echo "command=$OLD_PIPELINE" >> "$meta_file" + echo "file=$rel_path" >> "$meta_file" + echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$meta_file" + + # Display progress + case "$category" in + pass) + echo -e " ${GREEN}[PASS]${RESET} $name" + succeeded=$((succeeded + 1)) + ;; + fail) + echo -e " ${YELLOW}[FAIL]${RESET} $name (verification failures found)" + succeeded=$((succeeded + 1)) # Still a valid run + ;; + inconclusive) + echo -e " ${YELLOW}[INCO]${RESET} $name" + succeeded=$((succeeded + 1)) + ;; + timeout) + echo -e " ${RED}[TIME]${RESET} $name" + failed=$((failed + 1)) + ;; + *) + echo -e " ${RED}[ERR ]${RESET} $name ($category)" + failed=$((failed + 1)) + ;; + esac + done + + echo "" + echo -e "${BOLD}Baseline captured:${RESET} $total tests" + echo " Ran successfully: $succeeded" + echo " Errored/Timeout: $failed" + echo " Stored in: $BASELINE_DIR/" + echo "" +} + +# --------------------------------------------------------------------------- +# Phase 2: Differential Comparison +# --------------------------------------------------------------------------- + +cmd_compare() { + local pipeline="${1:-$NEW_PIPELINE}" + + echo -e "${BOLD}=== Differential Comparison ===${RESET}" + echo " Baseline: $OLD_PIPELINE (stored in baseline/)" + echo " Current: $pipeline" + echo "" + + # Verify prerequisites + [ -x "$STRATA_BIN" ] || die "Strata binary not found at: $STRATA_BIN" + [ -d "$BASELINE_DIR" ] || die "No baseline found. Run '$0 baseline' first." + + # Create results directory + mkdir -p "$RESULTS_DIR" + + local total=0 + local same=0 + local improved=0 + local regression=0 + local different=0 + + # Track lists for summary + local regression_list="" + local improved_list="" + local different_list="" + + local test_files + test_files=$(get_test_files) + + for test_file in $test_files; do + local name + name=$(testname_from_file "$test_file") + total=$((total + 1)) + + local rel_path + rel_path="${test_file#$ROOT_DIR/}" + + # Check baseline exists + local baseline_meta="$BASELINE_DIR/${name}.meta" + if [ ! -f "$baseline_meta" ]; then + echo -e " ${YELLOW}[SKIP]${RESET} $name (no baseline)" + continue + fi + + # Read baseline category + local baseline_category + baseline_category=$(grep '^category=' "$baseline_meta" | cut -d= -f2) + + # Run new pipeline + local new_stdout="$RESULTS_DIR/${name}.stdout" + local new_stderr="$RESULTS_DIR/${name}.stderr" + + run_pipeline "$pipeline" "$rel_path" "$new_stdout" "$new_stderr" + local new_exit_code=$? + + local new_category + new_category=$(classify_result "$new_exit_code" "$new_stdout") + + # Write result metadata + local result_meta="$RESULTS_DIR/${name}.meta" + echo "exit_code=$new_exit_code" > "$result_meta" + echo "category=$new_category" >> "$result_meta" + echo "baseline_category=$baseline_category" >> "$result_meta" + echo "command=$pipeline" >> "$result_meta" + echo "file=$rel_path" >> "$result_meta" + echo "timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$result_meta" + + # Classify the comparison + local baseline_ok=false + local new_ok=false + + # "ok" means the pipeline produced a meaningful result (pass, fail, or inconclusive) + case "$baseline_category" in + pass|fail|inconclusive) baseline_ok=true ;; + esac + case "$new_category" in + pass|fail|inconclusive) new_ok=true ;; + esac + + if [ "$baseline_ok" = true ] && [ "$new_ok" = true ]; then + # Both ran successfully - compare outputs + local baseline_stdout="$BASELINE_DIR/${name}.stdout" + if diff -q "$baseline_stdout" "$new_stdout" >/dev/null 2>&1; then + echo -e " ${GREEN}[SAME]${RESET} $name" + echo "verdict=same" >> "$result_meta" + same=$((same + 1)) + else + # Outputs differ - check if it's an improvement + if [ "$baseline_category" = "fail" ] && [ "$new_category" = "pass" ]; then + echo -e " ${GREEN}[IMPR]${RESET} $name (fail -> pass)" + echo "verdict=improved" >> "$result_meta" + improved=$((improved + 1)) + improved_list="$improved_list $name ($baseline_category -> $new_category)\n" + elif [ "$baseline_category" = "pass" ] && [ "$new_category" = "fail" ]; then + echo -e " ${RED}[REGR]${RESET} $name (pass -> fail)" + echo "verdict=regression" >> "$result_meta" + regression=$((regression + 1)) + regression_list="$regression_list $name ($baseline_category -> $new_category)\n" + else + echo -e " ${BLUE}[DIFF]${RESET} $name (same category: $new_category, different output)" + echo "verdict=different" >> "$result_meta" + different=$((different + 1)) + different_list="$different_list $name ($baseline_category -> $new_category)\n" + fi + # Store the diff + diff -u "$baseline_stdout" "$new_stdout" > "$RESULTS_DIR/${name}.diff" 2>/dev/null + fi + elif [ "$baseline_ok" = false ] && [ "$new_ok" = true ]; then + # New pipeline succeeds where old one errored + echo -e " ${GREEN}[IMPR]${RESET} $name ($baseline_category -> $new_category)" + echo "verdict=improved" >> "$result_meta" + improved=$((improved + 1)) + improved_list="$improved_list $name ($baseline_category -> $new_category)\n" + elif [ "$baseline_ok" = true ] && [ "$new_ok" = false ]; then + # New pipeline errors where old one succeeded + echo -e " ${RED}[REGR]${RESET} $name ($baseline_category -> $new_category)" + echo "verdict=regression" >> "$result_meta" + regression=$((regression + 1)) + regression_list="$regression_list $name ($baseline_category -> $new_category)\n" + else + # Both errored - compare error categories + if [ "$baseline_category" = "$new_category" ]; then + echo -e " ${YELLOW}[SAME]${RESET} $name (both: $new_category)" + echo "verdict=same" >> "$result_meta" + same=$((same + 1)) + else + echo -e " ${BLUE}[DIFF]${RESET} $name ($baseline_category -> $new_category)" + echo "verdict=different" >> "$result_meta" + different=$((different + 1)) + different_list="$different_list $name ($baseline_category -> $new_category)\n" + fi + fi + done + + echo "" + echo -e "${BOLD}=== Summary ===${RESET}" + echo " Total: $total" + echo -e " Same: ${GREEN}$same${RESET}" + echo -e " Improved: ${GREEN}$improved${RESET}" + echo -e " Different: ${BLUE}$different${RESET}" + echo -e " Regression: ${RED}$regression${RESET}" + echo "" + + if [ -n "$improved_list" ]; then + echo -e "${GREEN}Improvements:${RESET}" + echo -e "$improved_list" + fi + + if [ -n "$different_list" ]; then + echo -e "${BLUE}Differences (non-blocking):${RESET}" + echo -e "$different_list" + fi + + if [ -n "$regression_list" ]; then + echo -e "${RED}REGRESSIONS (blocking):${RESET}" + echo -e "$regression_list" + fi + + # Write summary file + cat > "$RESULTS_DIR/summary.txt" <" + + # Normalize: strip .python.st.ion suffix if provided + testname="${testname%.python.st.ion}" + + local test_file="$TEST_DIR/${testname}.python.st.ion" + [ -f "$test_file" ] || die "Test file not found: $test_file" + + local rel_path + rel_path="${test_file#$ROOT_DIR/}" + + echo -e "${BOLD}=== Single Test: $testname ===${RESET}" + echo " File: $rel_path" + echo "" + + # Verify strata binary + [ -x "$STRATA_BIN" ] || die "Strata binary not found at: $STRATA_BIN" + + # Run old pipeline + echo -e "${BOLD}--- Old Pipeline ($OLD_PIPELINE) ---${RESET}" + local old_stdout + old_stdout=$(mktemp) + local old_stderr + old_stderr=$(mktemp) + + run_pipeline "$OLD_PIPELINE" "$rel_path" "$old_stdout" "$old_stderr" + local old_exit=$? + local old_category + old_category=$(classify_result "$old_exit" "$old_stdout") + + echo " Exit code: $old_exit" + echo " Category: $old_category" + echo " Output:" + sed 's/^/ /' "$old_stdout" + if [ -s "$old_stderr" ]; then + echo " Stderr:" + sed 's/^/ /' "$old_stderr" | head -20 + fi + echo "" + + # Run new pipeline + echo -e "${BOLD}--- New Pipeline ($NEW_PIPELINE) ---${RESET}" + local new_stdout + new_stdout=$(mktemp) + local new_stderr + new_stderr=$(mktemp) + + run_pipeline "$NEW_PIPELINE" "$rel_path" "$new_stdout" "$new_stderr" + local new_exit=$? + local new_category + new_category=$(classify_result "$new_exit" "$new_stdout") + + echo " Exit code: $new_exit" + echo " Category: $new_category" + echo " Output:" + sed 's/^/ /' "$new_stdout" + if [ -s "$new_stderr" ]; then + echo " Stderr:" + sed 's/^/ /' "$new_stderr" | head -20 + fi + echo "" + + # Compare + echo -e "${BOLD}--- Comparison ---${RESET}" + if diff -q "$old_stdout" "$new_stdout" >/dev/null 2>&1; then + echo -e " ${GREEN}IDENTICAL output${RESET}" + else + echo -e " ${YELLOW}DIFFERENT output${RESET}" + echo "" + echo " Diff (old vs new):" + diff -u --label="old ($OLD_PIPELINE)" --label="new ($NEW_PIPELINE)" \ + "$old_stdout" "$new_stdout" | sed 's/^/ /' + fi + + # Cleanup + rm -f "$old_stdout" "$old_stderr" "$new_stdout" "$new_stderr" +} + +# --------------------------------------------------------------------------- +# Summary (from stored results) +# --------------------------------------------------------------------------- + +cmd_summary() { + echo -e "${BOLD}=== Stored Results Summary ===${RESET}" + echo "" + + if [ ! -d "$RESULTS_DIR" ]; then + die "No results found. Run '$0 compare' first." + fi + + if [ -f "$RESULTS_DIR/summary.txt" ]; then + cat "$RESULTS_DIR/summary.txt" + echo "" + fi + + # Detailed breakdown by verdict + local same=0 improved=0 regression=0 different=0 + + for meta_file in "$RESULTS_DIR"/*.meta; do + [ -f "$meta_file" ] || continue + local verdict + verdict=$(grep '^verdict=' "$meta_file" | cut -d= -f2) + case "$verdict" in + same) same=$((same + 1)) ;; + improved) improved=$((improved + 1)) ;; + regression) regression=$((regression + 1)) ;; + different) different=$((different + 1)) ;; + esac + done + + echo "Breakdown:" + echo -e " Same: ${GREEN}$same${RESET}" + echo -e " Improved: ${GREEN}$improved${RESET}" + echo -e " Different: ${BLUE}$different${RESET}" + echo -e " Regression: ${RED}$regression${RESET}" + echo "" + + # List regressions + if [ "$regression" -gt 0 ]; then + echo -e "${RED}Regressions:${RESET}" + for meta_file in "$RESULTS_DIR"/*.meta; do + [ -f "$meta_file" ] || continue + local verdict + verdict=$(grep '^verdict=' "$meta_file" | cut -d= -f2) + if [ "$verdict" = "regression" ]; then + local name + name=$(basename "$meta_file" .meta) + local baseline_cat + baseline_cat=$(grep '^baseline_category=' "$meta_file" | cut -d= -f2) + local new_cat + new_cat=$(grep '^category=' "$meta_file" | cut -d= -f2) + echo " $name ($baseline_cat -> $new_cat)" + fi + done + echo "" + fi + + # List improvements + if [ "$improved" -gt 0 ]; then + echo -e "${GREEN}Improvements:${RESET}" + for meta_file in "$RESULTS_DIR"/*.meta; do + [ -f "$meta_file" ] || continue + local verdict + verdict=$(grep '^verdict=' "$meta_file" | cut -d= -f2) + if [ "$verdict" = "improved" ]; then + local name + name=$(basename "$meta_file" .meta) + local baseline_cat + baseline_cat=$(grep '^baseline_category=' "$meta_file" | cut -d= -f2) + local new_cat + new_cat=$(grep '^category=' "$meta_file" | cut -d= -f2) + echo " $name ($baseline_cat -> $new_cat)" + fi + done + echo "" + fi + + if [ "$regression" -gt 0 ]; then + exit 1 + else + exit 0 + fi +} + +# --------------------------------------------------------------------------- +# List Tests +# --------------------------------------------------------------------------- + +cmd_list() { + echo -e "${BOLD}=== Available Test Files ===${RESET}" + echo "" + local count=0 + for test_file in $(get_test_files); do + local name + name=$(testname_from_file "$test_file") + echo " $name" + count=$((count + 1)) + done + echo "" + echo "Total: $count test files" +} + +# --------------------------------------------------------------------------- +# Main Dispatch +# --------------------------------------------------------------------------- + +case "${1:-}" in + baseline) + cmd_baseline + ;; + compare) + shift + cmd_compare "$@" + ;; + single) + shift + cmd_single "$@" + ;; + summary) + cmd_summary + ;; + list) + cmd_list + ;; + --help|-h|help) + usage + ;; + *) + usage + ;; +esac diff --git a/StrataTest/Languages/Python/re_stub.py b/StrataTest/Languages/Python/re_stub.py new file mode 100644 index 0000000000..6a91b70a2e --- /dev/null +++ b/StrataTest/Languages/Python/re_stub.py @@ -0,0 +1,13 @@ +"""Regex module stub for Resolution.""" + +def fullmatch(pattern: str, string: str): + pass + +def match(pattern: str, string: str): + pass + +def search(pattern: str, string: str): + pass + +def compile(pattern: str): + pass diff --git a/StrataTest/Languages/Python/tests/test_class_field_any.py b/StrataTest/Languages/Python/tests/test_class_field_any.py index 26a0316dcc..3fde088fdc 100644 --- a/StrataTest/Languages/Python/tests/test_class_field_any.py +++ b/StrataTest/Languages/Python/tests/test_class_field_any.py @@ -1,4 +1,5 @@ class MyClass: + some_field: Any def __init__(self, some_field): self.some_field: Any = some_field diff --git a/StrataTest/Languages/Python/tests/test_class_field_any.python.st.ion b/StrataTest/Languages/Python/tests/test_class_field_any.python.st.ion new file mode 100644 index 0000000000..bd21547fbf Binary files /dev/null and b/StrataTest/Languages/Python/tests/test_class_field_any.python.st.ion differ diff --git a/StrataTest/Languages/Python/tests/test_class_field_init.py b/StrataTest/Languages/Python/tests/test_class_field_init.py index e6d6bc66e6..00f5f92e61 100644 --- a/StrataTest/Languages/Python/tests/test_class_field_init.py +++ b/StrataTest/Languages/Python/tests/test_class_field_init.py @@ -2,6 +2,7 @@ class CircularBuffer: name: str + size: int def __init__(self, size: int, name: str): self.size: int = size self.name = name diff --git a/StrataTest/Languages/Python/tests/test_class_field_init.python.st.ion b/StrataTest/Languages/Python/tests/test_class_field_init.python.st.ion new file mode 100644 index 0000000000..39edbb2ebe Binary files /dev/null and b/StrataTest/Languages/Python/tests/test_class_field_init.python.st.ion differ diff --git a/StrataTest/Languages/Python/tests/test_class_field_use.py b/StrataTest/Languages/Python/tests/test_class_field_use.py index 543aa36e2a..ef285d0a92 100644 --- a/StrataTest/Languages/Python/tests/test_class_field_use.py +++ b/StrataTest/Languages/Python/tests/test_class_field_use.py @@ -4,6 +4,7 @@ from typing import List class CircularBuffer: + buffer: int def __init__(self, n: int): print("Hi") self.buffer : int = n diff --git a/StrataTest/Languages/Python/tests/test_class_field_use.python.st.ion b/StrataTest/Languages/Python/tests/test_class_field_use.python.st.ion new file mode 100644 index 0000000000..8cc2a05af0 Binary files /dev/null and b/StrataTest/Languages/Python/tests/test_class_field_use.python.st.ion differ diff --git a/StrataTest/Languages/Python/tests/test_class_methods.py b/StrataTest/Languages/Python/tests/test_class_methods.py index 3fc24f4ecf..6dd981c4ec 100644 --- a/StrataTest/Languages/Python/tests/test_class_methods.py +++ b/StrataTest/Languages/Python/tests/test_class_methods.py @@ -1,6 +1,8 @@ import test_helper class Account: + owner: str + balance: int def __init__(self, owner: str, balance: int): self.owner: str = owner self.balance: int = balance diff --git a/StrataTest/Languages/Python/tests/test_class_methods.python.st.ion b/StrataTest/Languages/Python/tests/test_class_methods.python.st.ion new file mode 100644 index 0000000000..3be30b65a6 Binary files /dev/null and b/StrataTest/Languages/Python/tests/test_class_methods.python.st.ion differ diff --git a/StrataTest/Languages/Python/tests/test_class_with_methods.py b/StrataTest/Languages/Python/tests/test_class_with_methods.py index 65d6cdfe76..e74cef750d 100644 --- a/StrataTest/Languages/Python/tests/test_class_with_methods.py +++ b/StrataTest/Languages/Python/tests/test_class_with_methods.py @@ -1,6 +1,8 @@ import test_helper class DataStore: + name: str + count: int def __init__(self, name: str): self.name: str = name self.count: int = 0 diff --git a/StrataTest/Languages/Python/tests/test_class_with_methods.python.st.ion b/StrataTest/Languages/Python/tests/test_class_with_methods.python.st.ion new file mode 100644 index 0000000000..606123a44c Binary files /dev/null and b/StrataTest/Languages/Python/tests/test_class_with_methods.python.st.ion differ diff --git a/StrataTest/Languages/Python/tests/test_with_statement.py b/StrataTest/Languages/Python/tests/test_with_statement.py index 0c07661240..373fa30674 100644 --- a/StrataTest/Languages/Python/tests/test_with_statement.py +++ b/StrataTest/Languages/Python/tests/test_with_statement.py @@ -1,6 +1,8 @@ class Resource: + value: int + def __init__(self, n: int): - self.value : int = n + self.value = n def __enter__(self) -> int: return self.value diff --git a/StrataTest/Languages/Python/tests/test_with_void_enter.py b/StrataTest/Languages/Python/tests/test_with_void_enter.py index fe34235e82..028d4c6c37 100644 --- a/StrataTest/Languages/Python/tests/test_with_void_enter.py +++ b/StrataTest/Languages/Python/tests/test_with_void_enter.py @@ -1,6 +1,8 @@ class VoidManager: + active: bool + def __init__(self): - self.active: bool = True + self.active = True def __enter__(self): self.active = True diff --git a/docs/elaborator_audit.md b/docs/elaborator_audit.md new file mode 100644 index 0000000000..4cb60fcd6c --- /dev/null +++ b/docs/elaborator_audit.md @@ -0,0 +1,63 @@ +# Elaborator Full Audit + +Every instance where the code deviates from the architecture. + +## Catch-alls returning Any + +1. **Line 80**: `instance : Inhabited FuncSig` — default has `returnType := .TCore "Any"`. Lean requires `Inhabited` but this default should never be used. + +2. **Line 90**: `instance : Inhabited NameInfo` — default is `.variable (.TCore "Any")`. Same — Lean requirement, should never be reached. + +3. **Line 219**: `eraseType` — `.TSet _ | .TMap _ _ | .Applied _ _ | .Intersection _ | .Unknown => .TCore "Any"`. These types shouldn't appear in well-typed Laurel from Translation. Should fail. + +4. **Line 539**: `lookupProcOutputs` — `env.procGrades[callee]?.getD .pure`. Guesses `pure` if grade not found. Should fail. + +5. **Line 553**: `lookupProcOutputs` — `| none => pure [("result", .TCore "Any")]`. Invents fake output if callee not found. Should fail. + +6. **Line 594**: `synthValueVar` — `| some (.function sig) => pure (.var md id.text, eraseType sig.returnType)`. Functions shouldn't be referenced as values unless they're being called. Unclear if this is correct or a hack. + +7. **Line 595**: `synthValueVar` — `| _ => pure (.var md id.text, .TCore "Any")`. Unknown name returns Any. Should fail. + +8. **Line 626**: `synthValueFieldSelect` — `ft.getD (.TCore "Any")`. Field type lookup returned none. Should fail. + +9. **Line 634**: `synthValueFieldSelect` — when `resolveFieldOwner` returns `none`, emits havoc with `Any`. Should fail (object type should tell us the class). + +10. **Line 652**: `synthValueStaticCall` — `(← read).procGrades[callee.text]?.getD .pure`. Guesses pure if grade not found. Should fail. + +11. **Line 659-661**: `synthValueStaticCall` — `| none => let checkedArgs ← args.mapM fun arg => checkValue arg (.TCore "Any"); pure (.staticCall md callee.text checkedArgs, .TCore "Any")`. Unknown callee returns Any. Should fail. + +12. **Line 684-687**: `checkArgValues` — `| arg :: rest, [] => do let v ← checkValue arg (.TCore "Any")`. Extra args beyond params checked against Any. Should fail (arity mismatch). + +13. **Line 861**: `elaborateCall` — `(← read).procGrades[callee.text]?.getD .pure`. Same as #4/#10. Guesses pure. + +14. **Line 948-953**: `bindArgs` — `| arg :: restArgs, [] => ... (.TCore "Any")`. Extra args beyond params. Should fail. + +## Option-returning lookups that should fail + +15. **Line 400**: `lookupEnv` returns `Option NameInfo`. Should return `NameInfo` and fail if not found. + +16. **Line 403-404**: `lookupFuncSig` returns `Option FuncSig`. Should return `FuncSig` and fail. + +17. **Line 405-408**: `lookupFieldType` returns `Option HighType`. Should return `HighType` and fail. + +## Structurally wrong + +18. **Line 409-412**: `resolveFieldOwner` — global scan by field name instead of using the object's synthesized type. + +19. **Line 910**: `checkProducer` `.Assign` multi-target — `| _ => checkProducers rest retTy grade`. Silently drops multi-target assignment. Should fail. + +20. **Line 913**: `checkProducer` `.New` — `failure`. Should be implemented (at least for assignment context — bare new is pathological). + +21. **Line 928-931**: `checkProducer` catch-all — emits havoc with `Any`. Should be `produce(checkValue stmt retTy)` for value expressions, `failure` for unsupported forms. + +22. **Line 993**: `checkAssignFieldWrite` — `guard (Grade.leftResidual .heap grade |>.isSome)`. Grade check belongs in subgrading, not here. + +## Missing from architecture + +23. `checkProducer` is not total — no explicit cases for: `LiteralInt`, `LiteralBool`, `LiteralString`, `LiteralDecimal`, `Identifier`, `FieldSelect`, `PureFieldUpdate`, `PrimitiveOp`, `This`, `ReferenceEquals`, `AsType`, `IsType`, `Forall`, `Exists`, `Assigned`, `Old`, `Fresh`, `ProveBy`, `ContractOf`, `Return`, `InstanceCall`. + +24. `checkProducerStaticCall` — no derivation tree previously (now fixed). + +25. `checkAssignVar` — derivation exists but code was using `checkValue` (now fixed to `checkProducer`). + +26. Doc `checkProducer` case list incomplete (now partially fixed but still references stale items). diff --git a/docs/elaborator_test_analysis.md b/docs/elaborator_test_analysis.md new file mode 100644 index 0000000000..566e308052 --- /dev/null +++ b/docs/elaborator_test_analysis.md @@ -0,0 +1,433 @@ +# Elaborator Test Divergence Analysis + +Comparing old pipeline (`pyAnalyzeLaurel`) vs new pipeline (`pyAnalyzeV2`). +55 tests total. 14 SAME, 2 IMPROVED, 36 DIFF, 2 REGRESSION. + +## Root Causes + +### A. Import resolution failure +The new Resolution pass doesn't load procedure specifications from imported modules. +When `from datetime import datetime, timedelta` or `import re` appears, the old pipeline +loads full procedure declarations (with requires/ensures) for `datetime_now`, `timedelta_func`, +`re_fullmatch`, `re_match`, `re_search`, etc. The new pipeline marks these as unresolved, +and Translation emits havocs. + +### B. Same-file procedure call resolution failure +Even for procedures defined in the same file, the new Resolution sometimes fails to +resolve calls. `test_helper_procedure` defined at the top of a file, called later — +the old pipeline resolves it; the new one emits a havoc. + +### C. `new` expansion removes Core translator pattern +The old pipeline emits `my_buf := new CircularBuffer; CircularBuffer@__init__(my_buf, args)` +which the Laurel-to-Core translator recognizes and generates `callElimAssert_requires` VCs for. +The new elaborator correctly expands this to `increment($heap); MkComposite(...); __init__(...)` — +explicit heap semantics per FGCBV. The Core translator doesn't recognize this expanded form. + +### D. `propertySummary` / `ensures` not carried through +The old pipeline copies user-written `requires` and `ensures` annotations onto procedure +declarations, with `propertySummary` labels. The new pipeline emits procedure declarations +without these annotations. This causes all precondition-checking VCs and return-type +constraint VCs to disappear. + +### E. Type erasure too aggressive (DictStrAny -> Composite) +The new elaborator erases `DictStrAny` annotations to `Composite` (because user-defined types +erase to Composite). This inserts `from_Composite(...)`/`Any..as_Composite!(...)` wrapping +around dict operations that cvc5 can't see through, making previously-provable asserts go unknown. + +--- + +## Per-Test Analysis + +### test_arithmetic — SAME +No divergence. + +### test_augmented_assign — SAME +No divergence. + +### test_boolean_logic — SAME +No divergence. + +### test_break_continue — DIFF (more principled) +**Old:** 11 procs, 8 ensures. Each function has `ensures Any..isfrom_None(LaurelResult)`. +Body starts with `LaurelResult := from_None(); var nullcall_ret := from_None(); var maybe_except := NoError()`. + +**New:** 7 procs, 3 ensures. Functions return `(LaurelResult: void, ...)`. No boilerplate. +Body is just the logic. + +**Verdict: More principled.** The old `ensures Any..isfrom_None(LaurelResult)` is a tautology — +it initializes LaurelResult to from_None and never changes it. The new pipeline correctly types +the return as void and doesn't emit a trivially-true ensures. The missing "Return type constraint" +VCs were vacuous. Loop logic is identical. + +### test_class_decl — DIFF (more principled, downstream gap) +**Old:** `my_buf := new CircularBuffer; CircularBuffer@__init__(my_buf, from_int(5))`. +`__init__` has `requires true`. Core translator emits `callElimAssert_requires_4`. + +**New:** `heap$0 := increment($heap); my_buf := MkComposite(Heap..nextReference!($heap), CircularBuffer_TypeTag()); CircularBuffer@__init__(from_Composite(my_buf), 5)`. + +**Verdict: More principled.** Explicit heap allocation is the correct FGCBV semantics. The old +`new` keyword hides what's actually happening. Lost VC (`callElimAssert_requires_4`) is because +the Core translator pattern-matches on `new` which no longer appears. Root cause C. + +### test_class_field_any — DIFF (more principled, downstream gap) +**Old:** 7 procs, `new` present, `callElimAssert_requires_3` emitted. +**New:** 2 procs, `new` expanded to heap ops. + +**Verdict: More principled.** Same as test_class_decl — root cause C. The lost VC was for the +`new` + `__init__` pattern. Both old and new fail to prove `assert(133)` (inconclusive in both). + +### test_class_field_init — DIFF (more principled, downstream gap) +**Old:** 9 procs, `new` present, `callElimAssert_requires_5` + `postcondition`. +**New:** 5 procs, `new` expanded. + +**Verdict: More principled.** Root cause C. Same pattern — heap expansion removes `new` pattern. + +### test_class_field_use — DIFF (more principled, downstream gap) +**Old:** 10 procs, `new` present, `callElimAssert_requires_8` + `postcondition`. +**New:** 6 procs, `new` expanded. + +**Verdict: More principled.** Root cause C. The assert `assert(301)` is inconclusive in BOTH +pipelines — the lost VCs were only for the class instantiation pattern. + +### test_class_methods — DIFF (wrong — specs + resolution) +**Old:** 12 procs, req 8, `callElimAssert_requires_12`, `Origin_test_helper_procedure_Requires` checked. +**New:** 7 procs, req 1. Method calls (`Account@__init__`, `Account@get_owner`, `Account@get_balance`, +`Account@set_balance`) ARE correctly resolved and called with heap threading. But +`test_helper_procedure(from_str("foo"), from_None())` at end of main becomes `havoc$16`. + +**Verdict: WRONG.** Root cause B (same-file `test_helper_procedure` not resolved — it's defined +at module level but Resolution can't find it) PLUS root cause C (`new` expanded — correct) +PLUS root cause D (no requires/ensures on any procedure). The method calls work; the standalone +procedure call and all specs are lost. + +### test_class_with_methods — DIFF (wrong — specs + resolution) +**Old:** 12 procs, req 8. +**New:** 7 procs, req 1. Only 2 actual havocs: `havoc$0()` for `__name__` (standard) and +`havoc$16` for `test_helper_procedure` (unresolved). All `` are output var declarations. + +**Verdict: WRONG.** Same pattern as test_class_methods. Method calls on class instances are +correctly resolved. `test_helper_procedure` standalone call becomes havoc (root cause B). +All specs missing (root cause D). `new` correctly expanded (root cause C, principled). + +### test_comparisons — SAME +No divergence. + +### test_composite_return — DIFF (more principled, downstream gap) +**Old:** 8 procs, `new` present, `callElimAssert_requires_3` + `postcondition`. +**New:** 3 procs, no `new`, no requires, no ensures. + +**Verdict: More principled.** Root cause C. The old pipeline emitted these VCs from the `new` +pattern; the new correctly expands. No functional logic difference. + +### test_control_flow — SAME +No divergence. + +### test_datetime — DIFF (wrong) +**Old:** 7 procs including `datetime_now()` and `timedelta_func(days, hours)` with full +requires/ensures. `now := datetime_now()` gives cvc5 `ensures Any..isfrom_datetime(ret)`. +**New:** 1 proc. All datetime/timedelta calls become havocs. 0 ensures. + +**Verdict: WRONG.** Root cause A. `from datetime import datetime, timedelta` not resolved. +The entire test becomes meaningless — all asserts go unknown because cvc5 has no information +about what `now` or `delta` contain. + +### test_default_params — DIFF (wrong — specs only) +**Old:** 7 procs, req 5, ens 3, specs 6. `greet` has `requires Any..isfrom_str(name)` + +`ensures Any..isfrom_str(LaurelResult)`. `power` same pattern. +**New:** 5 procs, req 1, ens 1, specs 0. `greet` and `power` exist with correct bodies. +Calls `greet("Alice", "Hello")` and `power(3, 2)` are correctly resolved. Only havoc is +the standard `havoc$0()` for `__name__`. No resolution failures. + +**Verdict: WRONG.** Root cause D only (NOT A). All calls resolve correctly. The user-written +type constraints and return type ensures are not propagated to output declarations. + +### test_dict_operations — DIFF (more principled, less precise) +**Old:** `config: Core(Any)`. Direct `Any_get(config, ...)`. +**New:** `config: Core(Composite)`. `Any_get(from_Composite(config), ...)` with wrapping. + +Both have identical structure, same function calls, same asserts. Same requires/ensures counts. +But the new pipeline types `config` as `Composite` (because `dict` annotation erases to it), +then wraps every access in `from_Composite(...)`. cvc5 can't simplify +`Any_get(from_Composite(Any..as_Composite!(from_DictStrAny(...))), key)` because `from_Composite` +and `as_Composite!` are opaque. + +**Verdict: More principled but less precise.** Root cause E. The type erasure is technically +correct (dict is a user-defined type → Composite) but produces opaque wrapping that blocks +verification. Fix: don't erase DictStrAny to Composite in `eraseType`. + +### test_for_loop — DIFF (more principled) +**Old:** 7 procs, 13 havocs. **New:** 4 procs, 6 havocs. + +**Verdict: More principled.** New has FEWER havocs (6 vs 13). Same requires/ensures. The new +pipeline is actually better here — fewer opaque values. The difference is structural (no +boilerplate procs, no `nullcall_ret`/`maybe_except` initialization). + +### test_fstrings — SAME +No divergence. + +### test_func_input_type_constraints — DIFF (wrong — specs only) +**Old:** 10 procs, req 14, ens 7, specs 10. Full type constraints on function inputs. +**New:** 8 procs, req 7, ens 4, specs 0. All procedures (`Mul`, `Sum`, `List_Dict_index`) exist +with correct bodies. Zero havocs. Calls are correctly resolved. + +**Verdict: WRONG.** Root cause D. User-written type constraints (`requires Any..isfrom_str(x)`) +on function parameters are not propagated to the output procedure declarations. The specs +(propertySummary) are entirely lost. This means the verifier can't check type safety at +call sites. + +### test_function_def_calls — DIFF (wrong) +**Old:** `test_helper_procedure` with 3 requires, `my_f` with 1 requires. Call site checks generated. +**New:** `test_helper_procedure` doesn't exist. `my_f` body is a single havoc. + +**Verdict: WRONG.** Root cause B. Same-file procedure `test_helper_procedure` not resolved. +The call inside `my_f` becomes a havoc. All precondition VCs lost. + +### test_if_elif — DIFF (wrong — specs only) +**Old:** `classify` has `requires Any..isfrom_int(x)` + `ensures Any..isfrom_str(LaurelResult)`. +Call `classify(PNeg(from_int(5)))` is resolved. Same in new: `classify(Any..as_int!(PNeg(from_int(5))))`. + +**New:** `classify` exists, calls are resolved correctly. But no `requires`/`ensures` on it. +cvc5 can't infer that the return is a string, so downstream asserts go unknown. + +**Verdict: WRONG.** Root cause D only (NOT B — I was wrong before). Calls are resolved. Specs not propagated. + +### test_ifexpr — DIFF (naming only) +**Old:** `set_result_calls_Any_to_bool_0`. **New:** `ite_cond_calls_Any_to_bool_0`. + +**Verdict: Fine.** Same VC, different name. The old pipeline names it after the assignment target, +the new names it after the if-expression condition. Semantically identical. + +### test_list_slice — SAME +No divergence. + +### test_list — SAME +No divergence. + +### test_loops — DIFF (more principled) +**Old:** 8 procs, req 8, ens 4, 13 havocs. **New:** 5 procs, req 8, ens 4, 2 havocs. + +**Verdict: More principled.** Same requires/ensures counts. New has FEWER havocs (2 vs 13) +and fewer procs (no boilerplate). The verification results should be equivalent or better. + +### test_method_call_with_kwargs — DIFF (more principled, downstream gap) +**Old:** 8 procs, `new` present, `callElimAssert_requires_6`. +**New:** 3 procs, no `new`. + +**Verdict: More principled.** Root cause C. Same as other class tests — `new` expanded. + +### test_method_param_reassign — SAME +No divergence. + +### test_missing_models — DIFF (wrong — import resolution + specs) +**Old:** 9 procs, req 9, ens 5, specs 4. User procs (`math_stuff`, `string_stuff` etc.) present. +**New:** 6 procs, req 6, ens 4, specs 0. No user procs — they use imported types/calls +that aren't resolved. `foo := havoc$0` (class instantiation), `response := havoc$1` (method call). + +**Verdict: WRONG.** Root causes A+B+D. The test uses `from foo import Foo` and calls methods +on imported class instances. Resolution doesn't load the import. Plus specs not propagated. + +### test_module_level — SAME +No divergence. + +### test_multi_function — DIFF (wrong — specs only) +**Old:** 12 procs, req 16, ens 7, specs 11. +**New:** 9 procs, req 8, ens 4, specs 0. `create_config`, `validate_config`, `process_config` +all present as procedures with correct bodies. Calls between them are resolved. + +**Verdict: WRONG.** Root cause D only (NOT B). All same-file procedures are resolved. +The requires/ensures/propertySummary annotations are not propagated to the output. + +### test_multiple_except — DIFF (more principled) +**Old:** 7 procs, 9 havocs. **New:** 3 procs, 4 havocs. Same req/ens. + +**Verdict: More principled.** Fewer procs, fewer havocs, same constraints. The new pipeline +produces tighter output. + +### test_nested_calls — DIFF (wrong — specs only) +**Old:** `double` has `requires Any..isfrom_int(x)` + `ensures Any..isfrom_int(LaurelResult)`. +`add_one` same. Call `double(3)`, `add_one(a)` etc. correctly resolved in both. + +**New:** `double($in_x: int)` and `add_one($in_x: int)` exist. Calls are `double(3)`, +`add_one(a)` — correctly resolved, NOT havocs. But no requires/ensures. + +**Verdict: WRONG.** Root cause D only (NOT B — I was wrong before). All calls resolve +correctly. The issue is purely that specs are not propagated to output declarations. + +### test_optional_param_default — DIFF (wrong) +**Old:** 6 procs, req 5, ens 3. `timedelta_func` present with requires. +**New:** 3 procs, req 1, ens 1. No `timedelta_func`. + +**Verdict: WRONG.** Root cause A. Import not resolved. `timedelta` calls become havocs. + +### test_pin_any — DIFF (more principled) +**Old:** 5 procs, 1 havoc. **New:** 2 procs, 0 havocs. Same req. + +**Verdict: More principled.** Fewer procs, zero havocs. Cleaner output. + +### test_power — SAME +No divergence. + +### test_precondition_verification — DIFF (wrong) +**Old:** 6 procs, req 4, `Origin_test_helper_procedure_Requires` checked at call sites. +**New:** 3 procs, req 1, +3 havocs. + +**Verdict: WRONG.** Root cause B. `test_helper_procedure` not resolved. Its preconditions +never get checked at call sites. + +### test_procedure_in_assert — DIFF (wrong) +**Old:** 6 procs, req 5, ens 3. `timedelta_func` present. +**New:** 3 procs, req 1, ens 1. + +**Verdict: WRONG.** Root cause A. Import not resolved. + +### test_regex_negative — DIFF (wrong) +**Old:** 5 procs, req 5, 5 havocs. **New:** 3 procs, req 1, 54 havocs. + +**Verdict: WRONG.** Root cause A. `import re` not resolved. Every `re.fullmatch`/`re.search` +call (there are many) becomes a havoc. 5 → 54 havocs. + +### test_regex_positive — DIFF (wrong) +**Old:** 5 procs, req 5, 4 havocs. **New:** 3 procs, req 1, 288 havocs. + +**Verdict: WRONG.** Root cause A. Same as regex_negative but bigger test. 4 → 288 havocs. +Every regex call is a havoc. + +### test_return_types — DIFF (wrong — specs only) +**Old:** 10 procs, req 3, ens 6, specs 7. Each function has `ensures` for return type. +**New:** 8 procs, req 1, ens 1, specs 0. All functions (`get_number`, `get_greeting`, +`get_flag`, `get_nothing`, `add`) exist with correct bodies. Only havoc is `havoc$0()` for `__name__`. + +**Verdict: WRONG.** Root cause D only (NOT B). All procedures resolved. Return type ensures +and type constraint requires not propagated. + +### test_strings — SAME +No divergence. + +### test_subscription — SAME +No divergence. + +### test_timedelta_expr — DIFF (wrong) +**Old:** 6 procs, `timedelta_func` with requires/ensures. `now := datetime_now()`. +**New:** 1 proc. Both calls are havocs. + +**Verdict: WRONG.** Root cause A. Import not resolved. + +### test_try_except_scoping — DIFF (more principled, duplicate emission bug) +**Old:** 7 procs, 6 VCs total. **New:** 3 procs, 27 VCs (same asserts repeated many times). + +**Verdict: More principled structure** (same try/except logic, no boilerplate) **BUT has a +duplicate emission bug.** The same `assert(355)` check is emitted 8+ times. This is a +Translation or elaboration bug where try/except block scoping causes repeated VC generation. +Not an architectural problem — just a bug in how labeled blocks are duplicated. + +### test_try_except — DIFF (more principled) +**Old:** 7 procs. **New:** 3 procs. Same req. + +**Verdict: More principled.** Fewer procs, same constraints. Try/except structure preserved. + +### test_unsupported_config — IMPROVED (internal_error -> pass) +Old pipeline crashed. New pipeline succeeds. + +### test_user_error_metadata — IMPROVED (user_error -> pass) +Old pipeline reported a user error. New pipeline succeeds. + +### test_variable_in_nested_block — SAME +No divergence. + +### test_variable_reassign — DIFF (more principled) +**Old:** 5 procs, 6 havocs. **New:** 3 procs, 4 havocs. Same req/ens. + +**Verdict: More principled.** Fewer havocs, fewer boilerplate procs. + +### test_while_loop — DIFF (more principled) +**Old:** 7 procs, ens 4, 6 havocs. **New:** 4 procs, ens 1, 0 havocs. + +**Verdict: More principled.** Zero havocs vs 6. Fewer boilerplate ensures (return type +constraints that were tautologies). Core loop logic identical. + +### test_with_statement — DIFF (more principled + downstream gap) +**Old:** 13 procs, `new` x4, req 5, 12 ``. +**New:** 8 procs, no `new`, req 1, 35 `` (all are output var declarations for effectful calls, not unresolved). + +`Resource@__init__`, `Resource@__enter__`, `Resource@__exit__`, `Resource@get_value` are all +present in new output. The `with` statement is correctly desugared into `__enter__`/`__exit__` +calls with explicit heap threading. Zero actual havocs in the new output. + +The +23 `` are output variable declarations: every `($heap$N, LaurelResult$N, maybe_except$N) := call(...)` +requires declaring those 3 outputs first. This is the correct elaboration calling convention. + +**Verdict: More principled.** Root causes C (new expanded, correct) + D (specs not propagated). +NOT import resolution failure — all calls resolve correctly. + +### test_foo_client_folder — REGRESSION (pass -> internal_error) +**Old:** Passes with VCs. +**New:** `Cannot infer the type of this operation: $field.__name__` — type checking error. + +The elaborator's `synthValueFieldSelect` can't resolve `__name__` as a field on any class. +`resolveFieldOwner` returns `none`. The old pipeline handled this differently (either through +a different resolution path or by not attempting field-level elaboration on dunder attributes). + +**Verdict: REGRESSION.** Bug in elaboration: dunder attributes (`__name__`, `__class__`, etc.) +on objects don't belong to any class in `classFields`. Need a fallback that doesn't crash. + +### test_invalid_client_type — REGRESSION (pass -> internal_error) +Same root cause as test_foo_client_folder — `$field.__name__` or similar dunder attribute +access that the elaborator can't resolve. + +**Verdict: REGRESSION.** Same fix needed. + +### test_with_void_enter — DIFF (more principled + downstream gap) +**Old:** 10 procs, `new` present, `callElimAssert_requires_8/5/2`, `postcondition`. +**New:** 4 procs, no `new`. + +**Verdict: More principled.** Root cause C. `new` correctly expanded. Lost VCs are from the +Core translator not recognizing the expanded form. + +--- + +## Summary Table + +| Verdict | Count | Tests | +|---------|-------|-------| +| SAME | 14 | arithmetic, augmented_assign, boolean_logic, comparisons, control_flow, fstrings, list_slice, list, method_param_reassign, module_level, power, strings, subscription, variable_in_nested_block | +| More principled | 13 | break_continue, class_decl, class_field_any, class_field_init, class_field_use, composite_return, for_loop, loops, method_call_with_kwargs, multiple_except, pin_any, variable_reassign, while_loop | +| Naming difference only | 1 | ifexpr | +| More principled + downstream gap | 3 | with_void_enter, with_statement, try_except | +| More principled + less precise | 1 | dict_operations | +| More principled + dup bug | 1 | try_except_scoping | +| WRONG (import resolution) | 6 | datetime, timedelta_expr, regex_positive, regex_negative, optional_param_default, procedure_in_assert | +| WRONG (same-file resolution) | 2 | function_def_calls, precondition_verification | +| WRONG (specs not propagated) | 8 | default_params, func_input_type_constraints, return_types, if_elif, nested_calls, multi_function, class_methods, class_with_methods | +| WRONG (multiple causes) | 1 | missing_models (A+B+D) | +| IMPROVED | 2 | unsupported_config, user_error_metadata | +| REGRESSION (internal error) | 2 | foo_client_folder, invalid_client_type | + +Note: class_methods and class_with_methods have both same-file resolution failure +(for `test_helper_procedure`) AND spec propagation failure. They're categorized under +specs because that's the dominant issue — the resolution failure affects only one call +at the end of main. + +## Priority Fixes + +1. **Spec propagation** (9 tests, highest impact): The new pipeline produces correct procedure + bodies but strips all `requires`/`ensures`/`propertySummary` annotations. This is the single + largest source of verification precision loss. These specs come from Python type annotations + and user-written preconditions — the old pipeline's Translation pass emits them. The new + Translation or Elaboration drops them. Fix: ensure `fullElaborate` preserves + `preconditions`/`determinism`/output specs from the input Laurel procedures. + +2. **Import resolution** (6 tests): Load module procedure specs when processing `import` / + `from ... import`. Without this, all calls to imported functions become havocs. + +3. **Same-file procedure resolution** (3 tests): `test_helper_procedure` defined at module level + can't be resolved when called from within functions. Resolution likely processes function + bodies before all top-level defs are registered. + +4. **DictStrAny erasure** (1 test): Don't erase `DictStrAny` to `Composite` in `eraseType`. + Keep it as `DictStrAny`. The round-trip `from_Composite(Any..as_Composite!(...))` is opaque to cvc5. + +5. **Try/except duplication** (1 test): Fix duplicate VC emission in labeled block handling. + +6. **Core translator pattern** (nice-to-have): Teach the Core translator to emit + `callElimAssert_requires` for the expanded `increment + MkComposite + __init__` pattern. + Not required for soundness. diff --git a/docs/verso/PythonDoc.lean b/docs/verso/PythonDoc.lean new file mode 100644 index 0000000000..71729b83a2 --- /dev/null +++ b/docs/verso/PythonDoc.lean @@ -0,0 +1,1041 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ + +import VersoManual + +import Strata.Languages.Python.Resolution +import Strata.Languages.Python.Translation +import Strata.Languages.FineGrainLaurel.Elaborate + +open Strata.Python.Resolution +open Strata.Python.Translation +open Strata.FineGrainLaurel + +-- This gets access to most of the manual genre +open Verso.Genre Manual + +-- This gets access to Lean code that's in code blocks, elaborated in +-- the same process and environment as Verso +open Verso.Genre.Manual.InlineLean + +set_option pp.rawOnError true + +#doc (Manual) "The Python to Laurel Translation Pipeline" => +%%% +shortTitle := "Python Pipeline" +%%% + +# The Problem + +The Laurel-to-Core translator expects Laurel programs where: + +- Every name is resolved (no ambiguous references) +- Every call site has known arity and types +- Arguments to calls are values (not effectful expressions) +- Effects are explicit via calling conventions (heap threading, error outputs) + +Python gives us none of this. Names are ambiguous, scoping is implicit, +arguments can be arbitrary expressions (including effectful calls), and +effects are entirely implicit. + +# The Solution + +Three passes, each establishing invariants that the next pass relies on: + +``` +Array (Python.stmt SourceRange) (raw, unscoped) + | [Resolution] + v +ResolvedPythonProgram (every name disambiguated, annotated with NodeInfo) + | [Translation] + v +Laurel.Program (valid Laurel, but effects implicit, args may be producers) + | [Elaboration] + v +Laurel.Program (effects explicit, args are values — ready for Core) +``` + +_Resolution_ disambiguates names. Its output guarantees: every reference +is annotated with what it refers to (variable, function, class, method). +Translation cannot emit an undefined reference because it only uses +identifiers that Resolution produced. + +_Translation_ desugars Python surface syntax into Laurel. Its output +guarantees: valid Laurel structure (procedures, types, statements). But +it does NOT guarantee that effects are explicit or that arguments are +values — it translates Python structure directly. + +_Elaboration_ makes effects explicit. Its output guarantees: arguments +to calls are values, effectful calls have their outputs bound via the +calling convention, heap/error threading is explicit. This is what +Laurel-to-Core expects. + +## Engineering Principles + +:::table +header + * + * Principle + * What it eliminates + * + * Illegal states unrepresentable + * Undefined references, invalid calls + * + * Proof-relevant elimination + * Boolean blindness (no `isResolved` followed by separate lookup) + * + * Phase distinction + * Mixing scoping data with target-language identifiers + * + * Folds + * Ad-hoc traversal choices + * + * Correct by construction + * Post-hoc rewrites, defensive checks +::: + +# Resolution +%%% +tag := "resolution" +%%% + +Resolution is a fold over the Python AST that threads a growing context as +accumulator. Each declaration extends the context; each reference is looked +up in the current context and annotated with the result. The output is the +same AST with a `NodeInfo` on every node — the scoping derivation for the +program. + +## What Resolution Produces + +The annotation on each node tells Translation exactly what to do: + +- Name use of a bound local/param → `.variable name` (Translation emits a bare + identifier). `.variable` means a BOUND variable and nothing else. +- Function call → `.funcCall sig` (sig carries everything needed for emission) +- Class instantiation → `.classNew className initSig` +- Method call → `.funcCall sig` (sig has `className = some _` for qualification) +- Attribute access on a value → `.attribute name` (bare field name; Elaboration + resolves later). On a module/unresolved object → `.unresolved` (→ hole). +- Operators → `.funcCall sig` (operators are runtime procedures with correct arity) +- Unresolvable → `.unresolved` (Translation emits Hole) +- Non-reference → `.irrelevant` + +A function, overloaded function, or class name used in VALUE position (not as a +call callee) — e.g. `str` in `isinstance(x, str)`, or `MyClass` assigned to a +variable — resolves to `.unresolved`, not `.variable`. Laurel has no first-class +function or class values, so there is no bound identifier to emit; Translation +turns it into a hole. This is the saturation invariant: every name the elaborator +sees is either a bound `.variable` or has been turned into a hole upstream. The +elaborator, by definition, operates on well-scoped Laurel and never receives a +name it cannot bind. (Call sites are unaffected: a call computes its own +`.funcCall`/`.classNew` from the callee, independent of the callee name's +value-position annotation.) + +{docstring Strata.Python.Resolution.NodeInfo} + +This is proof-relevant elimination: pattern matching on `NodeInfo` gives you +the data you need AND determines your action. There is no +`isResolved : String -> Bool` followed by a separate lookup. The annotation +IS the resolution. + +## The Phase Boundary + +All Resolution types are purely Python-level. No `Laurel.Identifier` appears +anywhere in Resolution's output. This is enforced by a newtype: + +{docstring Strata.Python.Resolution.PythonIdentifier} + +The only ways to create one are `.fromAst` (from a parsed AST node), +`.fromImport` (first component of a dotted module path), or `.builtin` +(for Python builtins like `len`). You cannot fabricate an identifier from +an arbitrary string — all identifiers trace back to source or builtins. + +Translation obtains Laurel identifiers by calling accessor functions on +these Python-level structures. The builtin mapping (`len` -> +`Any_len_to_Any`), method qualification (`get_x` -> `Account@get_x`), and +module qualification (`timedelta` -> `datetime_timedelta`) are all encoded +in those accessors. Translation never applies naming conventions itself. + +## Function Signatures + +When Resolution encounters a function definition or a call, it builds a +`FuncSig` that carries everything Translation will need: + +{docstring Strata.Python.Resolution.FuncSig} + +The parameter structure distinguishes instance methods (with an explicit +receiver) from static functions: + +{docstring Strata.Python.Resolution.FuncParams} + +The receiver is separated from the parameter list so that argument matching +can handle it correctly — the receiver gets its own slot in the zip-fold. +The parameters themselves are split by Python's parameter categories: + +{docstring Strata.Python.Resolution.ParamList} + +Defaults are resolved expressions (they carry `ResolvedAnn`). This is what +makes the types mutually recursive — `ParamList` stores resolved defaults, +which depend on `ResolvedAnn`, which depends on `NodeInfo`, which depends +on `FuncSig`, which depends on `ParamList`. + +## How Resolution Builds Context + +Resolution threads a `Ctx` (a `HashMap PythonIdentifier CtxEntry`) as its +fold accumulator. At the top level, each declaration extends it: + +- `def f(...)` extends with `.function sig` +- `class C` extends with `.class_ name fields methods` +- `import M` extends with `.module_ moduleCtx` (where moduleCtx is M's resolved Ctx) +- `x : T = ...` extends with `.variable ty` + +{docstring Strata.Python.Resolution.CtxEntry} + +Within a class body, the context is extended with `self` typed as the +enclosing class (enabling method resolution on `self`) and all methods +registered under their bare names (enabling `self.method()` lookup). + +Within a function body, the context is extended with parameters and locals. +Python's scoping rule — any assignment target anywhere in the body is +function-local — is computed upfront: + +{docstring Strata.Python.Resolution.computeLocals} + +FunctionDef and ClassDef are NOT included in locals. They are declarations, +not assignment targets. + +## Import Resolution + +Resolution is monadic (`ResolveM := ReaderT System.FilePath (StateT ResolveState (EIO String))`). +The reader carries `baseDir` — the root directory for finding module files. +The state collects resolved imported module programs for Translation and +memoizes already-resolved module paths. + +A module is a Ctx. `CtxEntry.module_` carries the module's resolved context: + +``` +| module_ (moduleCtx : Ctx) +``` + +### Demand-Driven Loading + +Modules are loaded on demand — only when a name from them is actually +referenced. This avoids eagerly loading an entire package (e.g. boto3's 421 +submodules) when only one service is used. + +The mechanism relies on **qualified type annotations** in generated stubs. +The boto3 `__init__` stub declares: + +```python +@overload +def client(service_name: Literal["s3"]) -> boto3.S3: ... +``` + +The return type `boto3.S3` is an attribute expression (`.Attribute (.Name "boto3") "S3"`), +not a string. It is structured data in the AST. + +Loading proceeds lazily: + +1. `import boto3` → load `boto3/__init__.python.st.ion` (slim: only `client()` overloads, + no `from boto3.X import X`). Insert `boto3 → .module_ ctx` with `client` in ctx. + +2. `x = boto3.client("s3")` → `resolveMethodCall` looks up `client` in boto3's ctx → + `.function sig`. The return type annotation is `boto3.S3` (an Attribute expr). + +3. `x.list_buckets(...)` → `typeOfExpr` on `x` yields the annotation `boto3.S3`. + `resolveMethodCall` needs the `S3` class. It walks the attribute chain: + look up `boto3` → `.module_ ctx` → look up `S3` in ctx → not found → + **load `boto3/S3.python.st.ion` on demand**, resolve it, insert `S3` into + boto3's module ctx → now resolve `list_buckets` from `S3`'s methods. + +The key insight: the attribute chain `boto3.S3` in the type annotation IS the +load path. No external dispatch table needed. The structured AST contains +the information needed to locate the module file. + +### What becomes monadic + +Both statement-level AND type-resolution functions operate in `ResolveM`: +- `resolveStmt`, `resolveBlock`, `resolveFuncDef`, `resolveMatchCase` — encounter imports +- `resolveMethodCall`, `typeOfExpr` — may trigger demand-driven loads when + traversing qualified type annotations through module contexts + +`resolveExpr` itself remains pure for most cases. Only the `.Call` case +(which dispatches to `resolveMethodCall`) touches the monad. + +### Module file lookup + +Given component name `n` and directory `dir`: +1. Try `dir / (n ++ ".python.st.ion")` +2. Try `dir / n / "__init__.python.st.ion"` (package) + +### Compiled Module Cache + +Imported modules are compiled to Laurel on demand and cached to disk +(analogous to CPython's `.pyc`). The pipeline translates each imported +module's resolved AST with caching: + +``` +for each imported module (sourcePath, resolvedAST): + cachePath := sourcePath with ".python.st.ion" → ".laurel.st" + if cachePath exists on disk: + load cached Laurel program + else: + translate resolvedAST → Laurel program + write Laurel program to cachePath + merge Laurel program into combined program +``` + +The cached Laurel contains only signatures (procedure declarations, type +definitions — no bodies to elaborate). Subsequent runs skip Translation +entirely for cached modules. + +### Stub generation convention + +Generated library stubs (e.g. boto3) use **qualified attribute references** +for return types, not imports: + +```python +# boto3/__init__.py — SLIM, no from-imports of submodules +@overload +def client(service_name: Literal["s3"]) -> boto3.S3: ... +@overload +def client(service_name: Literal["ec2"]) -> boto3.EC2: ... +``` + +Each service class lives in its own file (`boto3/S3.python.st.ion`). +Only the services actually used by the analyzed program get loaded. + +### Query-Based Module Resolution + +Imported modules are resolved lazily at the declaration level. Loading a +module does NOT resolve all its statements. Instead: + +1. **Index** — scan the module AST for top-level declarations (class names, + function names, method names within classes). This is a shallow structural + scan — no body resolution, no type resolution. Fast (O(n) in declaration + count, not statement count). + +2. **Store thunked entries** — the Ctx entry for an imported class stores + method names with `Thunk FuncSig` for each method's signature. The thunk + captures the raw AST of the method definition. + +3. **Force on demand** — when `resolveMethodCall` needs a specific method's + signature (e.g. `s3.list_buckets(...)`), it forces that method's thunk. + This runs `extractFuncSig` on just that one function definition. Other + methods in the class remain unresolved. + +This means loading a 2841-line module (like S3) takes milliseconds (indexing +only). Each method call pays only for resolving one function's parameter list. + +The indexing scan is a simple structural match on the AST: +- `FunctionDef name ...` → record function name + raw AST +- `ClassDef name body ...` → record class name, scan body for method names + raw ASTs +- Everything else (TypedDicts, assignments, imports) → skip + +### Emitting Demanded Imported Declarations + +Imported modules are NOT translated whole. Resolution records exactly the +declarations a user program demands, and the pipeline translates only those. +Three kinds of demand are recorded in `ResolveState`: + +- **`demandedMethods`** — when `resolveMethodCall` resolves a class method + (e.g. `s3.delete_object`), it resolves that method's raw AST into a resolved + `FunctionDef` (`className = some S3`) and records it. The pipeline runs + `runTranslation` on these; each becomes an `S3@delete_object` procedure with + its leading-assert preconditions intact (stub asserts = specs). + +- **`demandedFunctions`** — when a call matches a module-level `.function` or + an `.overloadedFunction` overload (e.g. `boto3.client("s3")` → overload N), + the matched overload's raw AST is resolved and recorded. The pipeline + translates it into a `client$N` procedure whose return type is the service + class (`boto3.S3`). + +- **`demandedClasses`** — whenever a method or init of class `S3` is demanded, + `S3`'s name and field list (captured at index time in the `.class_` entry) + are recorded. The pipeline emits a `Composite` type definition for each, so + that `Composite "S3"` referenced by `client$N`'s return type is defined. + +The pipeline's Step 3: +1. Translate user code normally via `runTranslation`. +2. Translate `demandedMethods` and `demandedFunctions` (resolved ASTs) into + procedures. +3. Emit a `Composite` type for each `demandedClass` (fields → `pythonTypeToHighType`). +4. Imported procedures + types form the trusted runtime (not elaborated); + user code is elaborated normally. + +Only what the program touches is translated. The 345 TypedDicts and ~200 +uncalled methods of S3 never become Laurel. + +## Overload Resolution + +Python `@overload` functions define multiple signatures for the same name. +Resolution stores them as an ordered list of `FuncSig` under a single +`CtxEntry`: + +``` +| overloadedFunction (overloads : List FuncSig) +``` + +When Resolution encounters a call to an overloaded name, it walks the +overload list in declaration order and checks if the call site's arguments +match the parameter types of each overload. First match wins. + +Matching: for each parameter position, check if the argument's static type +(from `typeOfExpr` or literal type) is compatible with the parameter's +declared type. A `Literal["s3"]` parameter matches a string literal `"s3"`. +A `str` parameter matches any string-typed expression. `Any` matches +everything. + +The resolved call references a specific overload. Translation emits each +overload as a distinctly-named procedure: + +``` +client → client$0, client$1, ..., client$N +``` + +Only the overloads actually referenced by resolved calls are emitted (the +rest are dead code — never translated). The call site's annotation carries +the specific overload's sig, so Translation knows which disambiguated name +to call. + +Resolution builds the overload list from consecutive `@overload`-decorated +function definitions with the same name. The `@overload` decorator is +recognized by checking the `decorators` field for a `.Name "overload"` node. + +## Method Resolution + +When Resolution encounters `receiver.method()`, it needs to determine the +receiver's class to find the method signature. It does this by chasing +_spines_ — `.Name` and `.Attribute` chains: + +{docstring Strata.Python.Resolution.typeOfExpr} + +- `.Name n` looks up `ctx[n]` to get the variable's type annotation +- `.Attribute obj field` recursively gets the type of `obj`, finds that + class in ctx, and looks up `field` in its field list + +For any non-spine receiver (`.Call`, `.Subscript`, `.IfExp`), Resolution +emits `.unresolved`. This is tech debt — those forms could be resolved by +interpreting return types, but are not yet implemented. + +## Attribute Resolution + +An `.Attribute` whose object is a VALUE (a bound variable / instance) gets +`.attribute name`, where `name` is the bare Python field name. Resolution does +NOT resolve which class the field belongs to — that requires knowing the +receiver's type at use-site, which is Elaboration's job. Elaboration synthesizes +the receiver type and branches: + +- Composite receiver: look up the field in the class, emit `readField` +- Any receiver: produce Any (field access on Any is unknowable) + +An attribute access whose object is NOT a value has no receiver type, so it is +not a field access. If the object resolved to `.irrelevant` (a module, e.g. `sys` +in `sys.argv`) or `.unresolved`, the whole `.Attribute` resolves to `.unresolved` +(→ hole in Translation). `sys.argv` is a module member, not a field of a value; +emitting `FieldSelect` on a non-value would hand the elaborator a `FieldSelect` +whose object is a hole, which it cannot type — a saturation violation. + +When the Attribute is the callee of a Call (`obj.method()`), the Call +node's annotation carries `.funcCall sig` with the resolved method — the +Attribute's own annotation is irrelevant. + +## The Entry Point + +{docstring Strata.Python.Resolution.resolve} + +The initial context is seeded with Python builtins — each with a correct +`FuncSig` (proper arity, param names, return type): + +{docstring Strata.Python.Resolution.builtinContext} + +# The Bridge: Accessor Functions +%%% +tag := "accessors" +%%% + +Between Resolution and Translation sits a set of accessor functions. These +are the ONLY mechanism by which Translation obtains `Laurel.Identifier` +values. They encode all naming conventions in one place. + +{docstring Strata.Python.Resolution.PythonIdentifier.toLaurel} + +{docstring Strata.Python.Resolution.FuncSig.laurelName} + +{docstring Strata.Python.Resolution.FuncSig.laurelDeclInputs} + +{docstring Strata.Python.Resolution.FuncSig.matchArgs} + +{docstring Strata.Python.Resolution.FuncSig.laurelLocals} + +{docstring Strata.Python.Resolution.FuncSig.laurelReceiver} + +`matchArgs` deserves emphasis: it is a zip-fold over parameter slots. +Each slot is filled in order — positional arg first, then kwarg by name, +then resolved default. It includes the receiver slot for instance methods. +It lives in Resolution (not Translation) because it accesses the private +`ParamList` fields and the resolved default expressions. + + +# Translation +%%% +tag := "translation" +%%% + +Given an already-disambiguated AST, Translation emits Laurel by structural +recursion. It pattern matches on `NodeInfo` and calls the accessor +functions above. It never resolves names, never applies naming conventions, +never fabricates identifiers. + +## The Writer Monad + +Translation needs to emit statements. Most expression translations produce +a single Laurel expression. But some — like class instantiation in +expression position — need to emit prefix statements (`tmp := New cls; +initCall`) and then return a reference (`tmp`). A writer monad handles +this cleanly: + +{docstring Strata.Python.Translation.TransM} + +`tell` emits statements. `collect` (= `lift . runWriterT`) captures them +at block boundaries. `translateExpr` returns `TransM StmtExprMd` — it may +`tell` prefix statements and return an expression value. + +The state carries a fresh name counter and a stack of loop labels (for +break/continue → `Exit` translation): + +{docstring Strata.Python.Translation.TransState} + +{docstring Strata.Python.Translation.TransError} + +## How Translation Uses NodeInfo + +_Reference nodes_ (Name, Call, BinOp, Attribute): Translation pattern +matches on `ann.info` and transcribes: + +- `.variable name` -> `Identifier name.toLaurel` +- `.funcCall sig` -> `StaticCall sig.laurelName (matchArgs ...)` +- `.classNew cls initSig` -> `tell [New, initCall]; return tmpRef` +- `.attribute name` -> `FieldSelect obj name.toLaurel` +- `.unresolved` -> `Hole` + +For operators (BinOp, UnaryOp, Compare, BoolOp), Translation reads +`.funcCall sig` from the annotation. The sig has correct arity (2 for +binary, 1 for unary) and the correct runtime procedure name. Translation +uses `matchArgs` uniformly — no hardcoded argument lists. + +_Structural nodes_ (literals, control flow): Translation emits the +corresponding Laurel construct directly — `LiteralInt`, `Block`, `While`, +`IfThenElse`, `Assign`, `Exit`, `Assert`, `Assume`, `LocalVariable`. + +_Declaration nodes_ (FunctionDef, ClassDef): Translation reads +`.funcDecl sig` / `.classDecl name fields methods` and emits +`Procedure` / `CompositeType`. + +## Params as Mutable Locals + +Python parameters are mutable — you can reassign `x` inside a function. +Laurel inputs are immutable. Translation bridges this: + +- Procedure inputs are named `$in_X` +- The body declares `LocalVariable X := $in_X` for each param +- The body uses the mutable `X` + +## Type Mapping + +{docstring Strata.Python.Translation.pythonTypeToHighType} + +## The Entry Point + +{docstring Strata.Python.Translation.runTranslation} + +# Coverage +%%% +tag := "coverage" +%%% + +## Precisely Translated + +- Literals (int, bool, str, None) +- Variables (identifiers, scope hoisting) +- Binary/comparison/boolean/unary operators (-> prelude StaticCalls) +- Function definitions (params, defaults, kwargs, return) +- Class definitions (fields, methods with self) +- Assignments (simple, augmented, annotated, tuple unpacking) +- Control flow (if/elif/else, while, for, break, continue) +- Return, assert, assume +- Try/except (labeled blocks + isError guards) +- Context managers (with/as -> resolved enter/exit calls) +- List/dict/tuple literals (-> `ListAny_cons`/`DictStrAny_cons` encoding) +- F-strings (-> `to_string_any`) +- Subscript read/write (-> `Any_get`/`Any_sets`). A subscript target is not an + lvalue identifier, so a subscript assignment — including augmented + (`a[i] op= v`) — writes back through `Any_sets`, never `Assign [Any_get ...]`. +- Slice notation (-> `from_Slice`) +- Module imports (-> qualified name resolution) +- Class instantiation (-> New + init call) +- Method calls (-> qualified StaticCall with self) + +## Approximated (Hole) + +Sound but imprecise — the translation produces a nondeterministic Hole +that can take any value, so verification remains sound but cannot prove +properties that depend on the precise semantics. + +- Unresolved names (not in context) +- Function/overloaded/class names used as values (no first-class function/class + values in Laurel — e.g. the type argument `str` in `isinstance(x, str)`) +- Unmodeled standard-library and third-party names — no spec exists, so each + resolves to a sound hole, never an internal error: `defaultdict` (collections), + `DictWriter` (csv), `ArgumentParser`/`Namespace` (argparse), `Logger`/ + `getLogger` (logging), `bytes`, `sys.argv`, and boto3 service classes the stubs + do not cover (e.g. `KMS`). Modeling any of these is future work, not a bug. +- Lambda expressions +- List/set/dict comprehensions +- Generator expressions +- Walrus operator +- Match statements +- Async constructs +- Decorators +- Star expressions +- Float literals (no real arithmetic) + +## Unsupported (Translation throws) + +- Chained comparisons (`a < b < c`) +- Multiple assignment targets (`x = y = 5`) + + +# Elaboration +%%% +tag := "elaboration" +%%% + +## What Walks In, What Walks Out + +Input: a `Laurel.Program`. Output: a `Laurel.Program` with explicit effect +parameters determined by each procedure's grade. + +Formally, elaboration translates Laurel derivations into GFGL (Graded +Fine-Grain Laurel) derivations, then projects GFGL back to Laurel. We +present the Laurel type system (source), then GFGL (target), then the +translation. + +## Laurel: The Source Type System + +Laurel is impure CBV. One judgment form. The context Γ carries variable +bindings (x : A) and label names (l). + +$$`\Gamma \vdash e : A` + +$$`\frac{}{\Gamma \vdash n : \mathsf{int}} \qquad \frac{}{\Gamma \vdash b : \mathsf{bool}} \qquad \frac{}{\Gamma \vdash s : \mathsf{string}}` + +$$`\frac{(x : A) \in \Gamma}{\Gamma \vdash x : A}` + +$$`\frac{f : (A_1, \ldots, A_n) \to B \in \Gamma \quad \Gamma \vdash e_i : A_i}{\Gamma \vdash f(e_1, \ldots, e_n) : B}` + +$$`\frac{\Gamma \vdash e : C \quad \text{fields}(C, f) = T}{\Gamma \vdash e.f : T}` + +$$`\frac{\Gamma \vdash e : \Gamma(x) \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash (x := e);\ \text{rest} : A}` + +$$`\frac{\Gamma \vdash e : T \quad \Gamma, x{:}T \vdash \text{rest} : A}{\Gamma \vdash (\mathbf{var}\ x{:}T := e);\ \text{rest} : A}` + +$$`\frac{\Gamma \vdash c : \mathsf{bool} \quad \Gamma \vdash t : A \quad \Gamma \vdash f : A \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash (\mathbf{if}\ c\ \mathbf{then}\ t\ \mathbf{else}\ f);\ \text{rest} : A}` + +$$`\frac{\Gamma \vdash c : \mathsf{bool} \quad \Gamma \vdash \text{body} : A \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash (\mathbf{while}\ c\ \mathbf{do}\ \text{body});\ \text{rest} : A}` + +$$`\frac{\Gamma, l \vdash \text{body} : A \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash \{\text{body}\}_l;\ \text{rest} : A}` + +$$`\frac{l \in \Gamma}{\Gamma \vdash \mathbf{exit}\ l : A}` + +$$`\frac{\Gamma \vdash c : \mathsf{bool} \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash (\mathbf{assert}\ c);\ \text{rest} : A}` + +$$`\frac{\Gamma \vdash \text{obj} : C \quad \Gamma \vdash v : \text{fieldType}(C, f) \quad \Gamma \vdash \text{rest} : A}{\Gamma \vdash (\text{obj}.f := v);\ \text{rest} : A}` + +$$`\frac{}{\Gamma \vdash \mathbf{skip} : \mathsf{TVoid}}` + +## GFGL: The Type System + +GFGL has two sorts — values (pure, duplicable) and producers (effectful, +carry a continuation). Typing is bidirectional with four judgment forms: + +$$`\Gamma \vdash V \Rightarrow A \qquad \Gamma \vdash V \Leftarrow A \qquad \Gamma \vdash M \Rightarrow A\ \&\ d \qquad \Gamma \vdash M \Leftarrow A\ \&\ e` + +### Types + +{docstring Strata.FineGrainLaurel.LowType} + +### Grades + +{docstring Strata.FineGrainLaurel.Grade} + +{docstring Strata.FineGrainLaurel.Grade.leftResidual} + +### Terms + +{docstring Strata.FineGrainLaurel.FGLValue} + +{docstring Strata.FineGrainLaurel.FGLProducer} + +### Subtyping: A ≤ B ↦ c + +`subtype` is a total case analysis of the coercion relation over `LowType`. Every +ordered pair `(A, B)` is decided: `.refl` when `A = B`, `.coerce w` when Python +performs an implicit conversion `A → B` (witnessed by one direct runtime +function), and `.unrelated` otherwise. `.unrelated` is a deliberate verdict per +pair, not a fall-through for forgotten cases. + +`LowType.TCore` carries an open name string, so the relation cannot match one arm +per name. It decides the finite set of core types that `eraseType` produces +(`Any`, `Composite`, `ListAny`, `DictStrAny`, …); any unrecognized `TCore` name +is `.unrelated`, the sound default for a type the relation knows nothing about. + +The coercion families, all witnessed by functions in the runtime: + +- **box** (`T ≤ Any`): the value constructors `from_int`, `from_str`, `from_bool`, + `from_float`, `from_Composite`, `from_ListAny`, `from_DictStrAny`, `from_None`. +- **unbox** (`Any ≤ T`): the projections `Any_to_bool`, `Any..as_int!`, + `Any..as_string!`, `Any..as_float!`, `Any..as_Composite!`, `Any..as_Dict!`, + `Any..as_ListAny!`. +- **truthiness** (`T ≤ bool`): Python's `bool(x)` per type — `int_to_bool`, + `str_to_bool`, `float_to_bool`, `list_to_bool`, `dict_to_bool`, `None ↦ false`, + `Composite ↦ true` (objects are truthy by default). +- **numeric** (`bool ≤ int ≤ float`): `bool_to_int`, `int_to_real`, `bool_to_real` + — Python's numeric tower for arithmetic. + +`subtype` returns one witness; the elaborator applies it once at each typing +boundary (only from `checkValue`) and never chains two `subtype` results, so each +pair needs only its single direct witness. + +{docstring Strata.FineGrainLaurel.subtype} + +### Subgrading: d ≤ e ↦ (pre, outs) + +{docstring Strata.FineGrainLaurel.mkGradedCall} + +### Runtime Interface (Heap Model) + +{docstring Strata.Laurel.heapConstants} + +### Value Synthesis: Γ ⊢ V ⇒ A + +$$`\frac{}{\Gamma \vdash \mathsf{litInt}\ n \Rightarrow \mathsf{TInt}} \qquad \frac{}{\Gamma \vdash \mathsf{litBool}\ b \Rightarrow \mathsf{TBool}} \qquad \frac{}{\Gamma \vdash \mathsf{litString}\ s \Rightarrow \mathsf{TString}}` + +$$`\frac{(x : A) \in \Gamma}{\Gamma \vdash \mathsf{var}\ x \Rightarrow A}` + +$$`\frac{f : (A_1, \ldots, A_n) \to B\ \&\ \mathsf{pure} \quad \Gamma \vdash V_i \Leftarrow A_i}{\Gamma \vdash \mathsf{functionCall}\ f\ [V_1, \ldots, V_n] \Rightarrow B}` + +### Value Checking: Γ ⊢ V ⇐ A + +$$`\frac{\Gamma \vdash V \Rightarrow B \quad B \leq A \mapsto c}{\Gamma \vdash c(V) \Leftarrow A}` + +### Producer Synthesis: Γ ⊢ M ⇒ A & d + +Exactly one rule: + +$$`\frac{f : (A_1, \ldots, A_n) \to B\ \&\ d \quad \Gamma \vdash V_i \Leftarrow A_i}{\Gamma \vdash \mathsf{procedureCall}\ f\ [V_1, \ldots, V_n] \Rightarrow B\ \&\ d}` + +### Producer Checking: Γ ⊢ M ⇐ A & e + +$$`\frac{\Gamma \vdash \mathsf{procedureCall}\ f\ [V_i] \Rightarrow B\ \&\ d \quad d \leq e \mapsto (\text{pre}, \text{outs}) \quad \Gamma, \text{outs} \vdash K \Leftarrow A\ \&\ (d \backslash e)}{\Gamma \vdash \mathsf{procedureCall}\ f\ (\text{pre} \mathbin{++} [V_i])\ \text{outs}\ K \Leftarrow A\ \&\ e}` + + +$$`\frac{\Gamma \vdash V \Leftarrow \mathsf{bool} \quad \Gamma \vdash M_t \Leftarrow A\ \&\ e \quad \Gamma \vdash M_f \Leftarrow A\ \&\ e \quad \Gamma \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{ifThenElse}\ V\ M_t\ M_f\ K \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma \vdash V \Leftarrow \mathsf{bool} \quad \Gamma \vdash M_b \Leftarrow A\ \&\ e \quad \Gamma \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{whileLoop}\ V\ M_b\ K \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma \vdash V \Leftarrow A}{\Gamma \vdash \mathsf{produce}\ V \Leftarrow A\ \&\ e} \qquad \frac{l \in \Gamma}{\Gamma \vdash \mathsf{exit}\ l \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma \vdash M \Leftarrow \Gamma(x)\ \&\ e \quad \Gamma \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{assign}\ x\ M\ K \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma \vdash M \Leftarrow T\ \&\ e \quad \Gamma, x{:}T \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{varDecl}\ x\ T\ M\ K \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma \vdash V \Leftarrow \mathsf{bool} \quad \Gamma \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{assert}\ V\ K \Leftarrow A\ \&\ e}` + +$$`\frac{\Gamma, l \vdash M_b \Leftarrow A\ \&\ e \quad \Gamma \vdash K \Leftarrow A\ \&\ e}{\Gamma \vdash \mathsf{labeledBlock}\ l\ M_b\ K \Leftarrow A\ \&\ e}` + +## The Translation ⟦·⟧ : Laurel → GFGL + +The translation is a transformation of Laurel typing derivations +(`Γ ⊢ e : A`) into GFGL producer checking derivations +(`⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d`). Every Laurel derivation maps to a producer — +even literals and variables (they become `produce V`). This is the +CBV-to-FGCBV embedding. + +Three functions: + +``` +⟦·⟧⇐ₚ : (Γ : LaurelCtx) → (s : StmtExpr) → (k : List StmtExpr) + → (A : HighType) → (d : Grade) + → (Γ ⊢ s;k : A) + → ∃(M : FGLProducer). (⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d) + +⟦·⟧⇒ᵥ : (Γ : LaurelCtx) → (e : StmtExpr) + → ∃(A : HighType). (Γ ⊢ e : A) + → ∃(V : FGLValue). (⟦Γ⟧ ⊢ V ⇒ ⟦A⟧) + +⟦·⟧⇐ᵥ : (Γ : LaurelCtx) → (e : StmtExpr) → (A : HighType) + → (Γ ⊢ e : A) + → ∃(V : FGLValue). (⟦Γ⟧ ⊢ V ⇐ ⟦A⟧) +``` + +`⟦·⟧⇐ₚ` (`checkProducer`) is the entry point. `⟦·⟧⇒ᵥ` (`synthValue`) +and `⟦·⟧⇐ᵥ` (`checkValue`) build value sub-terms inside producer forms. +Producer synthesis (⟦·⟧⇒ₚ) is handled by inversion within +`checkProducerStaticCall` — the single synthesis rule is always a call. + +### Setup: Environment and Grades + +Before translating, we build Γ from the program declarations and +infer grades for each procedure. + +{docstring Strata.FineGrainLaurel.buildElabEnvFromProgram} + +{docstring Strata.FineGrainLaurel.ElabTypeEnv} + +{docstring Strata.FineGrainLaurel.ElabEnv} + +{docstring Strata.FineGrainLaurel.ElabState} + +{docstring Strata.FineGrainLaurel.fullElaborate} + +`fullElaborate` runs two passes: + +1. _Grade inference_ (pass 1): For each user procedure, try elaborating its + body at grade `pure`, then `proc`, then `err`, then `heap`, then `heapErr`. + The first grade where elaboration succeeds (returns `some`) is that + procedure's grade. Iterate to fixpoint — when a callee's grade changes, + re-elaborate its callers. Convergence is guaranteed by the finite lattice. + +2. _Term production_ (pass 2): With grades fixed, elaborate each procedure's + body at its inferred grade. Pass 1 guarantees this succeeds. Project the + resulting GFGL term back to Laurel. + +Runtime procedure grades are not inferred — they're read from the signature +by `gradeFromSignature` (does it have a Heap input? An Error output?). + +_Fail-fast contract._ Elaboration receives well-scoped Laurel by construction +(Resolution saturates; Translation holes whatever it cannot bind), so every +procedure is expected to elaborate. If one nonetheless cannot — the elaborator +genuinely cannot produce Laurel that Core can consume — that is a hard error, not +a recoverable condition. The pipeline collects the names of all such procedures +and fails the whole run with a structured error listing them; it never emits a +procedure unchanged and never lets un-elaborated Laurel (with holes) reach Core. +A failure here means an upstream saturation gap to fix, located to a named +procedure — not a silent downstream "holes should have been eliminated" in Core. + +### Procedure bodies are commands (checked at `TVoid`) + +Both passes elaborate the body at expected type `.TVoid`, not the procedure's +return type. A translated procedure body is a statement command, not a value: +Python statements do not return their last expression, and `return e` was already +lowered by Translation to `LaurelResult := e; exit`. So the value, when there is +one, flows through that explicit assignment — which `checkAssign` types against +`LaurelResult`'s own declared type, independent of the ambient expected type. + +Checking the body at the return type instead would conflate the two. A loop body +or branch arm whose last statement is a void call (`print(...)`) would have that +call's `()` result coerced toward the declared return type and projected as a +spurious `LaurelResult := from_None()` — ill-typed when the return type is a +scalar (`Impossible to unify Any with string`). At `.TVoid` no such coercion +arises, and the void tail projects to nothing (see Projection's optional +destination). The return value reaches `LaurelResult` only through the `return` +assignment. + +### Preconditions + +A `requires` clause is a pure value of type `bool` — no effects, no sequencing, +no continuation — so pass 2 elaborates it with the value judgment `checkValue` +(expected type `.TBool`), not the producer judgment that elaborates bodies. +`checkValue` synthesizes the term and applies the subtyping coercions — +`from_int`/`from_str` on the argument literals (the runtime operators take `Any` +parameters) and `Any_to_bool` on the `Any`-typed result — and `projectValue` +yields the single Core expression that replaces the clause. Holes it uses are +collected into the program's hole procedures alongside the body's. + +A precondition may contain a hole — e.g. a stub assert +`re.compile(...).search(kwargs["RoleName"]) is not None`, where `re` is unmodeled +so the subterm is a hole. In a body such a hole is nondeterministic havoc, but in +a pure value position nondeterminism has no meaning: the value must be a +deterministic function of what is in scope. So `checkValue`'s `.Hole` case +elaborates *any* hole as the deterministic `hole_N(inputs)` (an uninterpreted +pure function of the procedure's inputs), regardless of how Translation marked it. +The contract stays well-typed and the resulting caller obligation is sound but +uninterpretable — verification is inconclusive, never unsound, and no conjunct is +dropped. + +Translation emits preconditions in surface form, e.g. +`PGe(Any_len_to_Any(Any_get($in_kwargs, "Key")), 1)` — bare `intConst 1` and +`strConst "Key"`, and an `Any`-typed `PGe(...)` standing in a `bool` position. +Without this step those terms reach Core uncoerced; the Core type checker reports +`Impossible to unify Any with (arrow Any (arrow Any Any))` at the clause's source +range. + +{docstring Strata.FineGrainLaurel.gradeFromSignature} + +### Type Erasure: ⟦·⟧ on types + +{docstring Strata.FineGrainLaurel.eraseType} + +### `checkProducer` — the entry point (⟦·⟧⇐ₚ) + +Each case in the pattern match translates a Laurel statement into the +corresponding GFGL producer checking derivation. The `k` parameter +is the continuation — `checkProducers(k, A, d)` translates it. + +- `.IfThenElse` → `checkProducerIf` +- `.While` → `checkProducerWhile` +- `.Exit` → exit rule (inline) +- `.LocalVariable` → `checkProducerVarDecl` +- `.Assert` / `.Assume` → `checkProducerAssert` / `checkProducerAssume` +- `.Block` → `checkProducerBlock` +- `.Assign` → `checkAssign` (dispatches on LHS/RHS) +- `.StaticCall` → `checkProducerStaticCall` (bare call, discards return value) +- `.New` → failure (bare `new` in statement position is pathological) +- `.Hole` → inline (deterministic or nondeterministic) +- `.Return` / `.InstanceCall` → failure (not yet supported) +- All other `StmtExpr` constructors → failure (bare value expressions are ill-typed in Laurel) + +{docstring Strata.FineGrainLaurel.checkProducer} + +The clause helpers, each implementing one translation rule: + +{docstring Strata.FineGrainLaurel.checkProducerIf} + +{docstring Strata.FineGrainLaurel.checkProducerWhile} + +{docstring Strata.FineGrainLaurel.checkProducerVarDecl} + +{docstring Strata.FineGrainLaurel.checkProducerAssert} + +{docstring Strata.FineGrainLaurel.checkProducerAssume} + +{docstring Strata.FineGrainLaurel.checkProducerStaticCall} + +{docstring Strata.FineGrainLaurel.checkProducerBlock} + +### `checkAssign` — assignment elaboration + +Dispatches on LHS to get the assignee, then on RHS: + +- `.FieldSelect` LHS → `checkAssignFieldWrite` (heap write) +- `.Identifier` LHS, `.StaticCall` RHS → `checkAssignStaticCall` +- `.Identifier` LHS, `.New` RHS → `checkAssignNew` +- `.Identifier` LHS, other RHS → `checkAssignVar` + +`StaticCall` and `New` RHS need the assignee inside the effect scope. + +{docstring Strata.FineGrainLaurel.checkAssign} + +{docstring Strata.FineGrainLaurel.checkAssignVar} + +{docstring Strata.FineGrainLaurel.checkAssignStaticCall} + +{docstring Strata.FineGrainLaurel.checkAssignNew} + +{docstring Strata.FineGrainLaurel.checkAssignFieldWrite} + +### `checkValue` — internal helper (⟦·⟧⇐ᵥ) + +Calls `synthValue`, then applies the coercion from `subtype`. + +{docstring Strata.FineGrainLaurel.checkValue} + +### `synthValue` — internal helper (⟦·⟧⇒ᵥ) + +Called by `checkValue`. Discovers the value and its type. Operates on +expressions already in value form (bound variables, literals, pure calls). + +{docstring Strata.FineGrainLaurel.synthValue} + +{docstring Strata.FineGrainLaurel.synthValueLiteral} + +{docstring Strata.FineGrainLaurel.synthValueVar} + +{docstring Strata.FineGrainLaurel.synthValueFieldSelect} + +{docstring Strata.FineGrainLaurel.synthValueStaticCall} + +## Projection: GFGL → Laurel (Destination Passing Style) + +Elaboration maps Laurel derivations (`Γ ⊢ e : A`) to GFGL derivations +(`⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d`). Projection reverses this: + +``` +⟦D⟧ₓ⁻¹ : (⟦Γ⟧ ⊢ M ⇐ ⟦A⟧ & d) → ∃e⃗. (Γ, x : A ⊢ e⃗ : TVoid) +``` + +Given a GFGL checking derivation `D` and a destination variable `x : A`, +projection produces a Laurel statement list `e⃗` that assigns to `x`. +One GFGL rule maps to one or more Laurel typing rules in the output. + +The destination is **optional**: `x : A` may be omitted. A producer whose value +has nowhere to go (a `TVoid` command — see "Procedure bodies are commands" +below) projects with no destination, and its tail `produce` emits no assignment +at all rather than `x := v`. This is the only correct reading when there is no +`x : A` in the context: there is nothing to assign to. + +``` +proj : Option StmtExprMd → FGLProducer → List StmtExprMd +``` + +The destination threads down unchanged through control flow (`if`/`while`/ +labeled block) and through a procedure call's continuation; an assignment's RHS +subproducer is projected with `some target`, so `x := f()` still writes `x` even +inside a void body. `projProduce none` yields `[]`; `projProduce (some d)` yields +`d := v`. + +The top-level body is projected with no destination (`none`). A `return e` was +already lowered by Translation to `LaurelResult := e; exit`, so the returned +value reaches `LaurelResult` through that explicit assignment, not through the +body's tail. + +Each helper carries its derivation tree showing the GFGL rule on top +and the Laurel rules on bottom: + +{docstring Strata.FineGrainLaurel.proj} + +{docstring Strata.FineGrainLaurel.projProduce} + +{docstring Strata.FineGrainLaurel.projVarDecl} + +{docstring Strata.FineGrainLaurel.projAssign} + +{docstring Strata.FineGrainLaurel.projIfThenElse} + +{docstring Strata.FineGrainLaurel.projWhileLoop} + +{docstring Strata.FineGrainLaurel.projProcedureCall} + +{docstring Strata.FineGrainLaurel.projAssert} + +{docstring Strata.FineGrainLaurel.projAssume} + +{docstring Strata.FineGrainLaurel.projLabeledBlock} + +{docstring Strata.FineGrainLaurel.projExit} + +{docstring Strata.FineGrainLaurel.projSkip} + +# Tech Debt +%%% +tag := "tech_debt" +%%% + +- _Instance procedures:_ Methods are emitted as top-level statics with + `self` as first param. The `instanceProcedures` field on CompositeType + is empty. +- _Spine resolution incomplete:_ Non-spine receivers emit `.unresolved`. +- _Match case pattern bindings:_ Not extracted as locals (requires walking + `Python.pattern`). +- _Loop labels:_ Push/pop on mutable state. Should be reader monad. +- _Multi-output forces err grade:_ Translation declares `maybe_except` + on every procedure, causing grade inference to always join with err. +- _Old resolver in Laurel→Core:_ `translateMinimal` still calls the old Laurel + `resolve` and `inferHoleTypes` rather than the new Laurel resolver. Both are + load-bearing as wired: `resolve` builds the `SemanticModel` Core translation + reads, and removing `inferHoleTypes` produces ill-typed Core across the suite + (it annotates expression types Core translation depends on, despite its name). + They must be ported to the new resolver, not deleted piecemeal. diff --git a/docs/verso/PythonDocMain.lean b/docs/verso/PythonDocMain.lean new file mode 100644 index 0000000000..e99996bc4e --- /dev/null +++ b/docs/verso/PythonDocMain.lean @@ -0,0 +1,16 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ + +import PythonDoc +open Verso.Genre.Manual (RenderConfig manualMain) + +def config : RenderConfig where + emitTeX := false + emitHtmlSingle := .immediately + emitHtmlMulti := .no + htmlDepth := 2 + +def main := manualMain (%doc PythonDoc) (config := config) diff --git a/docs/verso/generate.sh b/docs/verso/generate.sh index 2ca212838e..ca216fd244 100755 --- a/docs/verso/generate.sh +++ b/docs/verso/generate.sh @@ -17,5 +17,6 @@ cd "${curpwd}" lake exe ddm --with-html-single --output _out/ddm lake exe langdef --with-html-single --output _out/langdef lake exe laurel --with-html-single --output _out/laurel +lake exe python --with-html-single --output _out/python cp strata-hourglass.png _out/langdef/html-single/ cp -r ../api/.lake/build/doc _out/api diff --git a/docs/verso/index.html b/docs/verso/index.html index d10080b52a..71b97d96a9 100644 --- a/docs/verso/index.html +++ b/docs/verso/index.html @@ -36,6 +36,10 @@

Strata Core Language Definition Documentation

Laurel Language Documentation

Documentation for the Laurel intermediate verification language. Laurel attempts to provide features that are common to Java, Python, and JavaScript.

+ +

Python Pipeline Documentation

+

Documentation for the Python-to-Laurel translation pipeline: Resolution, Translation, and Elaboration.

+

API Reference

API documentation for Strata and StrataTest.

diff --git a/docs/verso/lakefile.toml b/docs/verso/lakefile.toml index f012b68f95..2cd876647b 100644 --- a/docs/verso/lakefile.toml +++ b/docs/verso/lakefile.toml @@ -1,5 +1,5 @@ name = "StrataDoc" -defaultTargets = ["ddm", "langdef", "laurel"] +defaultTargets = ["ddm", "langdef", "laurel", "python"] [[require]] name = "Strata" @@ -30,3 +30,10 @@ name = "LaurelDoc" [[lean_exe]] name = "laurel" root = "LaurelDocMain" + +[[lean_lib]] +name = "PythonDoc" + +[[lean_exe]] +name = "python" +root = "PythonDocMain"