Skip to content

Commit 32d91ec

Browse files
committed
kernel: environment-machine WHNF (Phase A — lazy substitution on the beta path)
Port of the IxVM environment machine (~/ix-aiur d0d3375; design in docs/env_machine_whnf.md). whnf_core's App arm enters a Krivine-style machine when a beta fires: beta/zeta become O(1) environment pushes onto a persistent cons-list of closures, and substitution materializes only at machine exits (clo_subst readback) — work proportional to what the reduction consumes, not to every beta performed. A beta chain ending in another beta never materializes its intermediate bodies. - subst.rs: Clo/MEnv closure types (carried env length; per-Clo OnceCell readback memo standing in for Aiur's global memoization), clo_subst/clo_readback with lbr guards, per-call (uid, depth) memo from a new InternTable scratch pool, results interned like subst. - whnf.rs: iterative machine_whnf loop + machine_exit, replacing the multi-lambda peel + simul_subst block. Fuel charged on beta/zeta at the eager path's one-substitution-event granularity. Every exit reads back and re-enters the outer loop, so ambient let/LDecl zeta, iota, prim dispatch, and cheap_proj/cheap_rec def-eq discipline are unchanged; all caches and keys untouched. - bench-cycles.sh: add natreclinear/natfoldsucc (foldAdd-class sentinels for the known curried-sharing regression class). 609 kernel + 200 ixon tests pass; native check_one true on Nat.add_comm, Nat.gcd_comm, Int16.instRxcHasSize_eq, natRecLinearCheck, Nat.fold_succ; perf_check whole-env failure parity with HEAD on 11 envs. Guest cycle benchmarks pending (box busy).
1 parent 6786753 commit 32d91ec

4 files changed

Lines changed: 467 additions & 27 deletions

File tree

crates/kernel/src/env.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ pub struct InternTable<M: KernelMode> {
111111
/// because `lift` is invoked from inside `subst_cached`, and the two
112112
/// caches have different semantics, so they must not share entries.
113113
pub(crate) lift_scratch: FxHashMap<(Addr, u64), KExpr<M>>,
114+
/// Pool of scratch maps for `clo_subst` per-call memoization, keyed by
115+
/// `(addr, depth)`. A pool rather than a single buffer because
116+
/// `clo_subst` re-enters itself through `clo_readback` of environment
117+
/// entries (each readback substitutes under a *different* environment,
118+
/// so the memo must not be shared between the nesting levels). Maps are
119+
/// cleared and returned to the pool on exit, so allocations persist
120+
/// across calls like the other scratches.
121+
pub(crate) clo_scratch_pool: Vec<FxHashMap<(Addr, u64), KExpr<M>>>,
114122
}
115123

116124
impl<M: KernelMode> Default for InternTable<M> {
@@ -165,6 +173,7 @@ impl<M: KernelMode> InternTable<M> {
165173
canon_univs: FxHashSet::default(),
166174
subst_scratch: FxHashMap::default(),
167175
lift_scratch: FxHashMap::default(),
176+
clo_scratch_pool: Vec::new(),
168177
}
169178
}
170179

@@ -726,6 +735,7 @@ impl<M: KernelMode> KEnv<M> {
726735
// intern.exprs cleanup above can actually drop ExprData allocations.
727736
self.intern.subst_scratch.clear();
728737
self.intern.lift_scratch.clear();
738+
self.intern.clo_scratch_pool.clear();
729739
let _ = self.prims.take();
730740
self.whnf_cache.clear();
731741
self.whnf_no_delta_cache.clear();

crates/kernel/src/subst.rs

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
//! uses a `PtrMap Expr Expr` for the same reason (see
1212
//! `refs/lean4lean/Lean4Lean/Expr.lean:14`).
1313
14+
use std::cell::OnceCell;
15+
use std::sync::Arc;
16+
1417
use rustc_hash::FxHashMap;
1518

1619
use super::env::{Addr, InternTable};
@@ -488,6 +491,217 @@ fn lift_cached<M: KernelMode>(
488491
interned
489492
}
490493

494+
// ============================================================================
495+
// Closures for the WHNF environment machine (see `whnf.rs` machine loop and
496+
// `docs/env_machine_whnf.md`). Beta/zeta there are O(1) pushes onto an
497+
// `MEnv`; substitution happens only here, at machine exit points
498+
// ("readback"), and only for the parts of the term the reduction actually
499+
// hands to a plain-expression consumer.
500+
// ============================================================================
501+
502+
/// Expression closed over a machine environment: `Clo { e, env }` denotes
503+
/// `e[Var(i) := env[i]]` for `i < env.len()`, with `Var(i)` for
504+
/// `i >= env.len()` shifting DOWN by `env.len()` into the ambient context.
505+
/// WHNF never reduces under binders, so environments never need lifting.
506+
pub(crate) struct Clo<M: KernelMode> {
507+
pub(crate) e: KExpr<M>,
508+
pub(crate) env: MEnv<M>,
509+
/// Memoized depth-0 readback. Closures are shared (one env entry may be
510+
/// referenced by many variables; spine args survive multiple machine
511+
/// re-entries), and without a global content-addressed memo this is
512+
/// what keeps each shared closure's substitution from re-running.
513+
readback: OnceCell<KExpr<M>>,
514+
}
515+
516+
impl<M: KernelMode> Clo<M> {
517+
pub(crate) fn new(e: KExpr<M>, env: MEnv<M>) -> Self {
518+
Clo { e, env, readback: OnceCell::new() }
519+
}
520+
521+
/// Closure over the empty environment (a plain expression).
522+
pub(crate) fn closed(e: KExpr<M>) -> Self {
523+
Clo::new(e, MEnv::empty())
524+
}
525+
}
526+
527+
struct MEnvNode<M: KernelMode> {
528+
head: Arc<Clo<M>>,
529+
tail: MEnv<M>,
530+
}
531+
532+
/// Persistent cons-list environment: O(1) push with structural sharing
533+
/// across the closures captured at each binder. `len` is carried on the
534+
/// handle — recomputing it per suffix was a measured cost on the IxVM
535+
/// port of this machine.
536+
pub(crate) struct MEnv<M: KernelMode> {
537+
node: Option<Arc<MEnvNode<M>>>,
538+
len: u64,
539+
}
540+
541+
// Manual impl: `#[derive(Clone)]` would demand `M: Clone`.
542+
impl<M: KernelMode> Clone for MEnv<M> {
543+
fn clone(&self) -> Self {
544+
MEnv { node: self.node.clone(), len: self.len }
545+
}
546+
}
547+
548+
impl<M: KernelMode> MEnv<M> {
549+
pub(crate) fn empty() -> Self {
550+
MEnv { node: None, len: 0 }
551+
}
552+
553+
pub(crate) fn len(&self) -> u64 {
554+
self.len
555+
}
556+
557+
pub(crate) fn push(&self, c: Arc<Clo<M>>) -> Self {
558+
MEnv {
559+
node: Some(Arc::new(MEnvNode { head: c, tail: self.clone() })),
560+
len: self.len + 1,
561+
}
562+
}
563+
564+
/// O(i) cons-list walk; `i` must be `< self.len()`. Machine variable
565+
/// lookups are typically near the front (recently pushed args).
566+
pub(crate) fn get(&self, i: u64) -> &Arc<Clo<M>> {
567+
let mut node = self.node.as_ref().expect("MEnv::get out of range");
568+
let mut i = i;
569+
while i > 0 {
570+
node = node.tail.node.as_ref().expect("MEnv::get out of range");
571+
i -= 1;
572+
}
573+
&node.head
574+
}
575+
}
576+
577+
/// Materialize a closure into a plain expression:
578+
/// `e[Var(i) := readback(env[i])]` for `i < env.len()`, `Var(j)` above
579+
/// shifted down by `env.len()`. Memoized per closure.
580+
pub(crate) fn clo_readback<M: KernelMode>(
581+
intern: &mut InternTable<M>,
582+
c: &Clo<M>,
583+
) -> KExpr<M> {
584+
if c.env.len() == 0 {
585+
return c.e.clone();
586+
}
587+
if let Some(v) = c.readback.get() {
588+
return v.clone();
589+
}
590+
let v = clo_subst(intern, &c.e, &c.env, 0);
591+
let _ = c.readback.set(v.clone());
592+
v
593+
}
594+
595+
/// Simultaneous environment substitution at binder depth `depth` — the
596+
/// machine's only substitution ("readback"). Var arm semantics:
597+
/// `i < depth` → unchanged (locally bound)
598+
/// `depth <= i < depth + n` → `lift(readback(env[i - depth]), depth)`
599+
/// `i >= depth + n` → `Var(i - n)`
600+
/// where `n = env.len()`. lbr-guarded at every node (a no-op subtree
601+
/// returns its original Arc), per-call memoized by `(uid, depth)`, and
602+
/// results interned — the same discipline as `subst`/`simul_subst`.
603+
///
604+
/// The memo scratch comes from a pool (`clo_scratch_pool`) rather than a
605+
/// single buffer because the Var arm re-enters `clo_subst` through
606+
/// `clo_readback` of environment entries, each under a *different*
607+
/// environment; nesting levels must not share memo entries.
608+
pub(crate) fn clo_subst<M: KernelMode>(
609+
intern: &mut InternTable<M>,
610+
e: &KExpr<M>,
611+
env: &MEnv<M>,
612+
depth: u64,
613+
) -> KExpr<M> {
614+
if env.len() == 0 || e.lbr() <= depth {
615+
return e.clone();
616+
}
617+
let mut cache = intern.clo_scratch_pool.pop().unwrap_or_default();
618+
let result = clo_subst_cached(intern, e, env, depth, &mut cache);
619+
cache.clear();
620+
intern.clo_scratch_pool.push(cache);
621+
result
622+
}
623+
624+
fn clo_subst_cached<M: KernelMode>(
625+
intern: &mut InternTable<M>,
626+
e: &KExpr<M>,
627+
env: &MEnv<M>,
628+
depth: u64,
629+
cache: &mut FxHashMap<(Addr, u64), KExpr<M>>,
630+
) -> KExpr<M> {
631+
if e.lbr() <= depth {
632+
return e.clone();
633+
}
634+
635+
let key = (e.hash_key(), depth);
636+
if let Some(cached) = cache.get(&key) {
637+
return cached.clone();
638+
}
639+
640+
let n = env.len();
641+
642+
let result = match e.data() {
643+
ExprData::Var(i, name, _) => {
644+
let i = *i;
645+
if i < depth + n {
646+
// `i < depth` is unreachable under the outer lbr guard, so this
647+
// is an env hit: substitute the entry's readback, lifted over
648+
// the local binders we are under.
649+
let c = env.get(i - depth).clone();
650+
let v = clo_readback(intern, &c);
651+
let r = lift(intern, &v, depth, 0);
652+
cache.insert(key, r.clone());
653+
return r;
654+
}
655+
KExpr::var(i - n, name.clone())
656+
},
657+
658+
ExprData::App(f, x, _) => {
659+
let f2 = clo_subst_cached(intern, f, env, depth, cache);
660+
let x2 = clo_subst_cached(intern, x, env, depth, cache);
661+
KExpr::app(f2, x2)
662+
},
663+
664+
ExprData::Lam(name, bi, ty, inner, _) => {
665+
let ty2 = clo_subst_cached(intern, ty, env, depth, cache);
666+
let inner2 = clo_subst_cached(intern, inner, env, depth + 1, cache);
667+
KExpr::lam(name.clone(), bi.clone(), ty2, inner2)
668+
},
669+
670+
ExprData::All(name, bi, ty, inner, _) => {
671+
let ty2 = clo_subst_cached(intern, ty, env, depth, cache);
672+
let inner2 = clo_subst_cached(intern, inner, env, depth + 1, cache);
673+
KExpr::all(name.clone(), bi.clone(), ty2, inner2)
674+
},
675+
676+
ExprData::Let(name, ty, val, inner, nd, _) => {
677+
let ty2 = clo_subst_cached(intern, ty, env, depth, cache);
678+
let val2 = clo_subst_cached(intern, val, env, depth, cache);
679+
let inner2 = clo_subst_cached(intern, inner, env, depth + 1, cache);
680+
KExpr::let_(name.clone(), ty2, val2, inner2, *nd)
681+
},
682+
683+
ExprData::Prj(id, field, val, _) => {
684+
let val2 = clo_subst_cached(intern, val, env, depth, cache);
685+
KExpr::prj(id.clone(), *field, val2)
686+
},
687+
688+
ExprData::FVar(..)
689+
| ExprData::Sort(..)
690+
| ExprData::Const(..)
691+
| ExprData::Nat(..)
692+
| ExprData::Str(..) => {
693+
// Closed atoms — unreachable under the lbr guard; defensive.
694+
let r = e.clone();
695+
cache.insert(key, r.clone());
696+
return r;
697+
},
698+
};
699+
700+
let interned = intern.intern_expr(result);
701+
cache.insert(key, interned.clone());
702+
interned
703+
}
704+
491705
/// Cheap beta reduction: peephole-reduce `App(λ...λ. body, args)` shapes
492706
/// without invoking the full [`subst`] machinery in trivial cases.
493707
///

0 commit comments

Comments
 (0)