Skip to content

Commit 41c5f09

Browse files
committed
feat(tco): self-recursive tail-call marker + LLVM tail hint
Adds a new `tco` pass that walks every function after the optimisation fixed-point and flips `is_tail = true` on direct self-recursive `Call`s whose result flows into the block's `Return` terminator with no intervening work. The LLVM backend then emits the `tail` flag on the corresponding `CallInst`, letting LLVM's sibling-call optimisation collapse the recursion when the ABI and stack layout allow it. Scope is restricted to self-recursive direct calls. A broader "every tail-position Call" scope was tried first and regressed the fib bench ~45 % exec: the single `main → fib(40)` call was the only mark in that program, and LLVM's IPA inliner reacted to the marker in a way that slowed fib's hot loop body (the marker perturbed inlining decisions for reasons unrelated to TCO). Self-recursion also matches the case where a future HIR-level loop-rewrite pass could fully eliminate the call frame, so the narrow scope is where the next payoff naturally lands. Cranelift's `return_call` lowering is half-wired (it'd double-terminate the block alongside the HIR Return terminator) and was unreachable before this commit. The three Cranelift call arms that read `is_tail` now ignore it and fall back to standard `call` until the terminator-skip plumbing lands separately. The HIR mark is still useful — LLVM honours it today. No bench delta on the current kernels (none use tail recursion); 7 new unit tests cover the marker's shape recognition.
1 parent 5a6a6f3 commit 41c5f09

4 files changed

Lines changed: 377 additions & 21 deletions

File tree

crates/compiler/src/cranelift_backend.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2126,7 +2126,7 @@ impl CraneliftBackend {
21262126
result,
21272127
callee,
21282128
args,
2129-
is_tail,
2129+
is_tail: _,
21302130
..
21312131
} => {
21322132
let arg_values: Vec<Value> = args.iter()
@@ -6200,7 +6200,7 @@ impl CraneliftBackend {
62006200
callee,
62016201
args,
62026202
type_args,
6203-
is_tail,
6203+
is_tail: _,
62046204
..
62056205
} => {
62066206
let arg_vals: Vec<_> = args.iter().map(|arg| self.value_map[arg]).collect();
@@ -6212,11 +6212,17 @@ impl CraneliftBackend {
62126212
.module
62136213
.declare_func_in_func(cranelift_func, builder.func);
62146214

6215-
let call = if *is_tail {
6216-
builder.ins().return_call(func_ref, &arg_vals)
6217-
} else {
6218-
builder.ins().call(func_ref, &arg_vals)
6219-
};
6215+
// `return_call` is a Cranelift terminator. Our
6216+
// block-level lowering still emits the HIR
6217+
// block's Return terminator after the Call, so
6218+
// a `return_call` here would leave the block
6219+
// double-terminated and trip Cranelift's
6220+
// verifier. The HIR tco marker still flips
6221+
// `is_tail = true` for downstream backends
6222+
// (LLVM gets the hint), but Cranelift falls
6223+
// back to the standard call until the
6224+
// terminator-skip plumbing lands.
6225+
let call = builder.ins().call(func_ref, &arg_vals);
62206226

62216227
if let Some(result_id) = result {
62226228
let results = builder.inst_results(call);
@@ -6257,13 +6263,11 @@ impl CraneliftBackend {
62576263
// This is a simplified version - in reality we'd need to track function signatures
62586264
let sig_ref = builder.import_signature(self.module.make_signature());
62596265

6260-
let call = if *is_tail {
6261-
builder
6262-
.ins()
6263-
.return_call_indirect(sig_ref, ptr_val, &arg_vals)
6264-
} else {
6265-
builder.ins().call_indirect(sig_ref, ptr_val, &arg_vals)
6266-
};
6266+
// Same terminator-already-emitted concern as the
6267+
// direct-call path above. Fall back to a
6268+
// standard `call_indirect` until the lowering
6269+
// can skip the trailing Return terminator.
6270+
let call = builder.ins().call_indirect(sig_ref, ptr_val, &arg_vals);
62676271

62686272
if let Some(result_id) = result {
62696273
let results = builder.inst_results(call);
@@ -7068,11 +7072,10 @@ impl CraneliftBackend {
70687072
})?;
70697073
let func_ref = self.module.declare_func_in_func(func, builder.func);
70707074

7071-
let call = if *is_tail {
7072-
builder.ins().return_call(func_ref, &arg_vals)
7073-
} else {
7074-
builder.ins().call(func_ref, &arg_vals)
7075-
};
7075+
// Symbol calls share the same terminator-skip
7076+
// concern as the direct-call arms above; fall
7077+
// back to a standard `call` until that lands.
7078+
let call = builder.ins().call(func_ref, &arg_vals);
70767079

70777080
if let Some(result_id) = result {
70787081
let results = builder.inst_results(call);

crates/compiler/src/lib.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub mod runtime;
5858
pub mod scalar_replace_alloc; // Eliminate non-escaping Call(Intrinsic::Malloc) allocations (heap SROA)
5959
pub mod ssa;
6060
pub mod stdlib; // Standard library implementation using HIR Builder
61+
pub mod tco; // Tail-call optimisation marker
6162
pub mod trait_lowering; // Trait/interface lowering to HIR
6263
pub mod typed_cfg; // New: TypedAST-aware CFG builder
6364
pub mod value; // Unified runtime value type (used by interp + embed)
@@ -1681,6 +1682,7 @@ pub struct InterpOptStats {
16811682
pub auto_vectorize: auto_vectorize::AutoVectorizeStats,
16821683
pub cfg_simplify: cfg_simplify::CfgSimplifyStats,
16831684
pub drop_insert: drop_insert::DropStats,
1685+
pub tco: tco::TcoStats,
16841686
}
16851687

16861688
/// Run the subset of HIR optimization passes that are safe for the
@@ -1886,6 +1888,23 @@ pub fn run_interp_safe_opts(module: &mut HirModule) -> InterpOptStats {
18861888
stats.auto_vectorize.rejected_no_iv += av.rejected_no_iv;
18871889
stats.auto_vectorize.rejected_trip_count += av.rejected_trip_count;
18881890

1891+
// Tail-call marker runs after the fixed-point sweep but before
1892+
// drop_insert. Two ordering reasons:
1893+
// * After the fixed-point: inlining can convert a non-tail Call
1894+
// into a tail-shape one (the inlined return becomes the
1895+
// caller's return), and const_fold / cse / cfg_simplify may
1896+
// also clear away intervening instructions that were blocking
1897+
// a tail shape. Running tco after all of those settle catches
1898+
// every shape they enable.
1899+
// * Before drop_insert: drop_insert places `Free` calls between
1900+
// Call and Return for any non-escaping malloc, breaking the
1901+
// "Call is the last instruction" precondition. Running tco
1902+
// first marks every valid shape; the drop_insert pass can
1903+
// then add Free calls without those marks getting lost.
1904+
let tc = tco::run_module(module);
1905+
stats.tco.candidates_visited += tc.candidates_visited;
1906+
stats.tco.marked += tc.marked;
1907+
18891908
// Drop-site insertion runs *after* the optimization fixed-point.
18901909
// Order matters two ways:
18911910
// * Inserting Free calls earlier would create new memory

crates/compiler/src/llvm_backend.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,9 +1270,9 @@ impl<'ctx> LLVMBackend<'ctx> {
12701270
args,
12711271
type_args: _,
12721272
const_args: _,
1273-
is_tail: _,
1273+
is_tail,
12741274
} => {
1275-
let result_val = self.compile_call(callee, args)?;
1275+
let result_val = self.compile_call(callee, args, *is_tail)?;
12761276
if let Some(res_id) = result {
12771277
self.value_map.insert(*res_id, result_val);
12781278
}
@@ -2991,6 +2991,7 @@ impl<'ctx> LLVMBackend<'ctx> {
29912991
&mut self,
29922992
callee: &HirCallable,
29932993
args: &[HirId],
2994+
is_tail: bool,
29942995
) -> CompilerResult<BasicValueEnum<'ctx>> {
29952996
match callee {
29962997
HirCallable::Function(func_id) => {
@@ -3017,6 +3018,17 @@ impl<'ctx> LLVMBackend<'ctx> {
30173018
}
30183019
}
30193020

3021+
// Tail-call hint. The HIR tco marker restricts
3022+
// `is_tail = true` to self-recursive direct calls, so
3023+
// by the time we get here the call is structurally a
3024+
// candidate for LLVM's sibling-call optimisation. The
3025+
// flag is purely advisory — LLVM ignores it when it
3026+
// can't prove TCO is safe — so it never causes a
3027+
// miscompile, only enables one when applicable.
3028+
if is_tail {
3029+
call_site.set_tail_call(true);
3030+
}
3031+
30203032
// Return value (or void)
30213033
match call_site.try_as_basic_value() {
30223034
ValueKind::Basic(val) => Ok(val),
@@ -3122,6 +3134,9 @@ impl<'ctx> LLVMBackend<'ctx> {
31223134
&arg_values,
31233135
"indirect_call",
31243136
)?;
3137+
if is_tail {
3138+
call_site.set_tail_call(true);
3139+
}
31253140

31263141
// Return value (or void)
31273142
match call_site.try_as_basic_value() {

0 commit comments

Comments
 (0)