Skip to content

Commit 7a4e6e8

Browse files
committed
merge: integration of bug #110/#116/#100 + feat #079/#082 + build profile
- fix #110 (in lake-frontend dev): lone wildcard ret-branch keeps arity 1. - fix #116 + #100: rt_write_async user_data=0 (fire-and-forget) — no more io_parked corruption from never-parked write actors. - feat #079: dense small-domain when emits a .rodata k-table with bounds-checked load instead of N-way switch. - feat #082: per-actor SCRATCH_RING in ExecCtx + rt_scratch_buf pool (SIZE 80→208). - build: [profile.release] incremental=true + new release-fast profile for dev iteration.
2 parents 3f10a01 + 9d3c6e4 commit 7a4e6e8

9 files changed

Lines changed: 456 additions & 35 deletions

File tree

Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,21 @@ tempfile = "3.24.0"
3030
[build-dependencies]
3131
anyhow = "1.0.100"
3232

33+
# Nightly-only: incremental compilation in release mode. Cuts
34+
# repeat-build time 5-10× when only a small slice of lake-frontend
35+
# or src/ changes. Costs ~5% on first build + a bit of disk.
36+
[profile.release]
37+
incremental = true
38+
39+
# Fast iteration profile. Same correctness as release, lower codegen
40+
# effort. Use `cargo build --profile release-fast` /
41+
# `cargo test --profile release-fast` for dev loops; benchmarks
42+
# and shipping builds keep the plain `--release` profile.
43+
[profile.release-fast]
44+
inherits = "release"
45+
opt-level = 1
46+
codegen-units = 256
47+
lto = false
48+
debug-assertions = false
49+
overflow-checks = false
50+

src/compiler/ctx/rt_funcs.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub struct RtFuncs {
2020
pub allocate: FuncId,
2121
pub allocate_raw: FuncId,
2222
pub free: FuncId,
23+
pub scratch_buf: FuncId,
2324
}
2425

2526
impl RtFuncs {
@@ -36,6 +37,7 @@ impl RtFuncs {
3637
allocate: resolve_func(module, "rt_allocate")?,
3738
allocate_raw: resolve_func(module, "rt_allocate_raw")?,
3839
free: resolve_func(module, "rt_free")?,
40+
scratch_buf: resolve_func(module, "rt_scratch_buf")?,
3941
})
4042
}
4143

@@ -89,6 +91,15 @@ impl RtFuncs {
8991
pub fn free_ref(&self, module: &mut ObjectModule, builder: &mut FunctionBuilder) -> FuncRef {
9092
module.declare_func_in_func(self.free, builder.func)
9193
}
94+
95+
/// Per-actor scratch buffer pool — see feature #082.
96+
pub fn scratch_buf_ref(
97+
&self,
98+
module: &mut ObjectModule,
99+
builder: &mut FunctionBuilder,
100+
) -> FuncRef {
101+
module.declare_func_in_func(self.scratch_buf, builder.func)
102+
}
92103
}
93104

94105
fn resolve_func(module: &ObjectModule, name: &str) -> Result<FuncId> {

src/compiler/pipeline/expr/when_expr.rs

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,153 @@ use crate::compiler::{
1717

1818
use super::{BranchState, compile_expr};
1919

20+
/// Feature #079 — K-table lookup for dense small-domain numeric `when`.
21+
/// See docs/state/features/079_*.md.
22+
fn const_fold_i64(expr: &Expr<'_>) -> Option<i64> {
23+
match expr {
24+
Expr::Num(s, _) => lake_frontend::api::expr::parse_int_literal(s).ok(),
25+
Expr::Bool(b) => Some(if *b { 1 } else { 0 }),
26+
Expr::Neg(inner) => const_fold_i64(&inner.inner).map(|v| v.wrapping_neg()),
27+
Expr::Add(l, r) => Some(const_fold_i64(&l.inner)?.wrapping_add(const_fold_i64(&r.inner)?)),
28+
Expr::Sub(l, r) => Some(const_fold_i64(&l.inner)?.wrapping_sub(const_fold_i64(&r.inner)?)),
29+
Expr::Mul(l, r) => Some(const_fold_i64(&l.inner)?.wrapping_mul(const_fold_i64(&r.inner)?)),
30+
Expr::Div(l, r) => {
31+
let rv = const_fold_i64(&r.inner)?;
32+
if rv == 0 {
33+
None
34+
} else {
35+
Some(const_fold_i64(&l.inner)?.wrapping_div(rv))
36+
}
37+
}
38+
Expr::BAnd(l, r) => Some(const_fold_i64(&l.inner)? & const_fold_i64(&r.inner)?),
39+
Expr::BOr(l, r) => Some(const_fold_i64(&l.inner)? | const_fold_i64(&r.inner)?),
40+
Expr::BXor(l, r) => Some(const_fold_i64(&l.inner)? ^ const_fold_i64(&r.inner)?),
41+
Expr::Shl(l, r) => {
42+
let rv = const_fold_i64(&r.inner)?;
43+
if !(0..64).contains(&rv) {
44+
None
45+
} else {
46+
Some(const_fold_i64(&l.inner)?.wrapping_shl(rv as u32))
47+
}
48+
}
49+
Expr::Shr(l, r) => {
50+
let rv = const_fold_i64(&r.inner)?;
51+
if !(0..64).contains(&rv) {
52+
None
53+
} else {
54+
Some((const_fold_i64(&l.inner)? as u64).wrapping_shr(rv as u32) as i64)
55+
}
56+
}
57+
_ => None,
58+
}
59+
}
60+
61+
/// Detector: returns `Some(values)` iff every arm key is an i64 `Num`,
62+
/// keys form exactly `0..N`, no wildcard, and every arm body is one
63+
/// const-foldable i64 expression.
64+
fn try_build_k_table<'a>(branches: &[(Expr<'a>, Vec<Expr<'a>>)]) -> Option<Vec<i64>> {
65+
if branches.len() < 2 {
66+
return None;
67+
}
68+
let n = branches.len() as i64;
69+
let mut values = vec![0i64; branches.len()];
70+
let mut seen = vec![false; branches.len()];
71+
for (cond, body) in branches.iter() {
72+
if is_wildcard(cond) {
73+
return None;
74+
}
75+
let key = match cond {
76+
Expr::Num(s, _) => lake_frontend::api::expr::parse_int_literal(s).ok()?,
77+
_ => return None,
78+
};
79+
if !(0..n).contains(&key) {
80+
return None;
81+
}
82+
let idx = key as usize;
83+
if seen[idx] {
84+
return None;
85+
}
86+
seen[idx] = true;
87+
if body.len() != 1 {
88+
return None;
89+
}
90+
values[idx] = const_fold_i64(&body[0])?;
91+
}
92+
if !seen.iter().all(|&s| s) {
93+
return None;
94+
}
95+
Some(values)
96+
}
97+
98+
/// Emit the K-table lookup path. Assumes the discriminant has already
99+
/// been compiled and lands at `disc_done_id`. Returns the `after_when_id`
100+
/// to use as the continuation point.
101+
fn emit_k_table(
102+
ctx: &mut CompilerCtx,
103+
builder: &mut FunctionBuilder,
104+
machine_ctx_var: Variable,
105+
outer_switch: &mut Switch,
106+
disc_done_id: i64,
107+
values: &[i64],
108+
) -> Result<i64> {
109+
let ptr_ty = ctx.module().target_config().pointer_type();
110+
let after_when_id = disc_done_id + 1;
111+
let n = values.len() as i64;
112+
113+
// Declare a Local .rodata symbol holding the i64 table.
114+
let data_name = format!("ktbl_{disc_done_id}");
115+
let data_id = ctx
116+
.module_mut()
117+
.declare_data(&data_name, Linkage::Local, false, false)?;
118+
let mut bytes: Vec<u8> = Vec::with_capacity(values.len() * 8);
119+
for v in values {
120+
bytes.extend_from_slice(&v.to_le_bytes());
121+
}
122+
let mut desc = DataDescription::new();
123+
desc.define(bytes.into_boxed_slice());
124+
ctx.module_mut().define_data(data_id, &desc)?;
125+
126+
let b_lookup = builder.create_block();
127+
let b_inrange = builder.create_block();
128+
let b_oob = builder.create_block();
129+
130+
// Lookup block — load disc from TEMP_VAL, bounds-check, branch.
131+
// Block sealing is deferred to `seal_all_blocks()` at the end of
132+
// machine compilation: sealing locally with zero predecessors
133+
// (the branch_switch jumps to b_lookup get added later) would
134+
// make `use_var(machine_ctx_var)` resolve to undef (0) because
135+
// the SSA reconstruction can't see backward through the not-yet
136+
// -emitted switch.
137+
builder.switch_to_block(b_lookup);
138+
let exec_start = ctx.exec_start(builder, machine_ctx_var);
139+
let disc = ExecCtxLayout::load(builder, ptr_ty, exec_start, ExecCtxLayout::TEMP_VAL);
140+
let in_range = builder.ins().icmp_imm(IntCC::UnsignedLessThan, disc, n);
141+
builder.ins().brif(in_range, b_inrange, &[], b_oob, &[]);
142+
143+
// In-range: load values[disc], store to TEMP_VAL, continue.
144+
builder.switch_to_block(b_inrange);
145+
let kt_gv = ctx
146+
.module_mut()
147+
.declare_data_in_func(data_id, builder.func);
148+
let kt_base = builder.ins().global_value(ptr_ty, kt_gv);
149+
let off = builder.ins().imul_imm(disc, 8);
150+
let addr = builder.ins().iadd(kt_base, off);
151+
let val = builder.ins().load(ptr_ty, MemFlags::trusted(), addr, 0);
152+
let exec_start2 = ctx.exec_start(builder, machine_ctx_var);
153+
ExecCtxLayout::store(builder, val, exec_start2, ExecCtxLayout::TEMP_VAL);
154+
let qb = ctx.quantum_block();
155+
let after_v = builder.ins().iconst(ptr_ty, after_when_id);
156+
builder.ins().jump(qb, &[BlockArg::Value(after_v)]);
157+
158+
// OOB: silent fall-through, matches when_no_match_continues semantics.
159+
builder.switch_to_block(b_oob);
160+
let after_v2 = builder.ins().iconst(ptr_ty, after_when_id);
161+
builder.ins().jump(qb, &[BlockArg::Value(after_v2)]);
162+
163+
outer_switch.set_entry(disc_done_id as u128, b_lookup);
164+
Ok(after_when_id)
165+
}
166+
20167
enum WhenBranchType {
21168
Simple,
22169
Ptr,
@@ -59,6 +206,36 @@ pub fn compile<'a>(
59206
positions.into_iter().next()
60207
};
61208

209+
// #079 — Detect the K-table fast path BEFORE creating the
210+
// N-way-switch helper blocks: those blocks would otherwise be
211+
// dangling (created but never filled) when we return early.
212+
if let Some(values) = try_build_k_table(&branches) {
213+
let disc_done_id = match compile_expr(
214+
ctx,
215+
builder,
216+
machine_ctx_var,
217+
block_id,
218+
outer_switch,
219+
state,
220+
cond_expr,
221+
None,
222+
None,
223+
false,
224+
)? {
225+
StmtOutcome::Continue(id) => id,
226+
other => bail!("`when` discriminant cannot be a terminal: {:?}", other),
227+
};
228+
let after_when_id = emit_k_table(
229+
ctx,
230+
builder,
231+
machine_ctx_var,
232+
outer_switch,
233+
disc_done_id,
234+
&values,
235+
)?;
236+
return Ok(StmtOutcome::Continue(after_when_id));
237+
}
238+
62239
let b_check = builder.create_block();
63240
let b_ret: Vec<_> = (0..branches.len())
64241
.map(|_| builder.create_block())

src/compiler/rt/funcs/io_uring.rs

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -621,29 +621,10 @@ pub fn define_write_async(mut ctx: CompilerCtx) -> Result<CompilerCtx> {
621621
.ins()
622622
.store(MemFlags::trusted(), len32, sqe_addr, 24);
623623

624-
// user_data @ 32 (u64) = current proc-ctx fat-ptr. Echoed verbatim in
625-
// the CQE, used by `emit_wake_by_user_data` to wake the right actor.
626-
let cur_idx = builder.ins().load(
627-
ty,
628-
MemFlags::trusted(),
629-
sh_ctx_start,
630-
ShedulerCtxLayout::CURRENT_PROCESS,
631-
);
632-
let proc_arr_fat_for_ud = builder.ins().load(
633-
ty,
634-
MemFlags::trusted(),
635-
sh_ctx_start,
636-
ShedulerCtxLayout::PROCESS_ARR_FAT,
637-
);
638-
let proc_arr_start_for_ud = builder
639-
.ins()
640-
.load(ty, MemFlags::trusted(), proc_arr_fat_for_ud, 0);
641-
let cur_off = builder.ins().ishl_imm(cur_idx, 3);
642-
let cur_addr = builder.ins().iadd(proc_arr_start_for_ud, cur_off);
643-
let cur_proc_ctx = builder.ins().load(ty, MemFlags::trusted(), cur_addr, 0);
624+
// user_data=0: rt_write_async is fire-and-forget. See docs/state/bugs/116_spawned_print_lost.md
644625
builder
645626
.ins()
646-
.store(MemFlags::trusted(), cur_proc_ctx, sqe_addr, 32);
627+
.store(MemFlags::trusted(), zero64, sqe_addr, 32);
647628

648629
// SQ.array[idx] = idx — the indirect submission queue.
649630
let arr_offset = builder.ins().ishl_imm(idx, 2);

src/compiler/rt/funcs/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub mod exit;
55
pub mod io_uring;
66
pub mod mmap;
77
pub mod read;
8+
pub mod scratch;
89
pub mod strings;
910
pub mod syscall;
1011
pub mod write;

0 commit comments

Comments
 (0)