Path A.2: f64 support in scalar + dual-band lowerers

RandomCoder-lab · claude · RandomCoder-lab · commit fb61a88f2f9e · 2026-05-15T16:51:01.000-05:00
Floats live on the operand stack as bitcast IEEE-754 bit patterns
inside i64-shaped slots. The bytecode-typed AddFloat / SubFloat /
MulFloat ops bitcast back to f64 at their boundary, do the float
math, and bitcast the result back to i64 for storage. The to_int
and to_float intrinsics handle the int↔float conversion at the
language boundary (sitofp / fptosi via bitcast).

Why bitcast-via-i64 instead of typing the whole stack: the lowerer
already uses Vec&lt;IntValue&gt; as the operand stack. Adding a tagged
StackItem enum would have touched every op handler. The bitcast
trick keeps the existing path unchanged and only the float-typed
ops (Add/Sub/MulFloat) and the conversion intrinsics need to know
about the encoding. Bytecode compiler enforces type discipline
upstream — the JIT trusts the typed op.

Implementation surface:
- Const::Float in LoadConst → const_int(f.to_bits())
- Op::AddFloat/SubFloat/MulFloat in scalar lowerer via new bin_float
  helper (bitcast-i64-to-f64, op, bitcast back)
- Op::AddFloat/SubFloat/MulFloat in dual-band lowerer via
  bin_vec_float helper (&lt;2 x i64&gt; bitcasts to &lt;2 x f64&gt;; both
  lanes get the parallel float op; bitcast back to &lt;2 x i64&gt;)
- Op::Call("to_float", 1): pop i64, sitofp f64, bitcast to i64
- Op::Call("to_int", 1): pop i64, bitcast f64, fptosi i64
- Mirrored intrinsics in dual-band: operate on α lane only,
  splat result back as matched-band &lt;r, r&gt;

Tests (3 new, all passing):
- float_round_trip_to_int_and_back: to_int(to_float(x)) == x for
  any int x (proves the bitcast encoding)
- float_arithmetic_via_to_float: area(r) = to_int(rf * rf) where
  rf = to_float(r) — exercises MulFloat
- float_loop_accumulator: sum_squares(n) using float Add/Mul in
  a while loop. Verified against closed form n(n+1)(2n+1)/6
  for n=1,2,3,10,100 (1, 5, 14, 385, 338,350)

Out of scope for Path A.2 (deferred):
- Float-typed Div / Mod: the OMC bytecode compiler doesn't yet
  emit DivFloat (Op::Div is always emitted, regardless of type),
  so the JIT would do integer division on float bit-patterns and
  produce garbage. Would need a compiler-side change to emit
  DivFloat when both operands are statically float, and then a
  matching JIT op. Documented in the test comment.
- Float comparison ops (FloatPredicate::OEQ etc.) — comparisons
  happen on the int representation today, which gives wrong
  answers for negative zeros and NaNs. Same story as Div.
- Float fn parameters or returns: signature stays scalar i64;
  callers convert at the boundary via to_float / to_int.
- Tracking var-slot type so a variable can hold either int or
  float across stores: currently a single alloca i64 holds
  either kind via bitcast. That's correct as long as the
  compiler doesn't mix types into the same slot, which the
  type-tracking it does today should prevent.

Workspace: 34 codegen tests pass (1 IR snapshot + 4 cross-fn +
5 dual-band + 5 dispatch + 3 harmony + 3 floats + 5 phi_shadow +
8 scalar). Smoke + harmonic-lib + 149 core tests still green.

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/omnimcode-codegen/src/dual_band.rs b/omnimcode-codegen/src/dual_band.rs
@@ -269,14 +269,18 @@ impl<'ctx, 'a> DualBandLowerer<'ctx, 'a> {
                     let alpha = match c {
                         Const::Int(n) => i64_type.const_int(*n as u64, true),
                         Const::Bool(b) => i64_type.const_int(*b as u64, false),
+                        // Path A.2: floats live on the i64 stack via
+                        // bitcast IEEE-754 bit pattern. Float-typed
+                        // ops bitcast back to f64 at the boundary.
+                        Const::Float(f) => i64_type.const_int(f.to_bits(), false),
                         _ => {
                             return Err(format!(
-                                "Session C only supports Const::Int/Const::Bool, got {:?} at op{}",
+                                "dual-band lowerer doesn't support {:?} at op{}",
                                 c, i
                             ));
                         }
                     };
-                    // Matched-band entry: β = α. (Session D will add
+                    // Matched-band entry: β = α. (Session F adds
                     // explicit phi-shadow ops that diverge β.)
                     let v = self.splat(alpha, &format!("const{}_v", idx))?;
                     stack.push(v);
@@ -319,6 +323,15 @@ impl<'ctx, 'a> DualBandLowerer<'ctx, 'a> {
                 }
 
                 Op::Add | Op::AddInt => self.bin_vec(&mut stack, i, |b, l, r| b.build_int_add(l, r, "add"))?,
+                // Path A.2: float arithmetic in dual-band mode.
+                // <2 x i64> bitcasts to <2 x f64> directly (same total
+                // bit-width); both lanes get the float op in parallel.
+                // β tracks α through float math the same way it does
+                // through int math (matched-band semantics until an
+                // explicit phi_shadow re-derives β).
+                Op::AddFloat => self.bin_vec_float(&mut stack, i, |b, l, r| b.build_float_add(l, r, "fadd"))?,
+                Op::SubFloat => self.bin_vec_float(&mut stack, i, |b, l, r| b.build_float_sub(l, r, "fsub"))?,
+                Op::MulFloat => self.bin_vec_float(&mut stack, i, |b, l, r| b.build_float_mul(l, r, "fmul"))?,
                 Op::Sub | Op::SubInt => self.bin_vec(&mut stack, i, |b, l, r| b.build_int_sub(l, r, "sub"))?,
                 Op::Mul | Op::MulInt => self.bin_vec(&mut stack, i, |b, l, r| b.build_int_mul(l, r, "mul"))?,
                 Op::Div => self.bin_vec(&mut stack, i, |b, l, r| b.build_int_signed_div(l, r, "div"))?,
@@ -486,6 +499,59 @@ impl<'ctx, 'a> DualBandLowerer<'ctx, 'a> {
                         stack.push(h_v);
                         continue;
                     }
+                    // Path A.2: int↔float boundary intrinsics. The
+                    // dual-band carrier is <2 x i64>; we operate on
+                    // the α lane only (β is the harmonic shadow,
+                    // which doesn't follow the user-visible value
+                    // through int↔float conversions).
+                    if name == "to_float" && *argc == 1 {
+                        let v_v = self.pop(&mut stack, i, "to_float arg")?;
+                        let f64_type = self.ctx.f64_type();
+                        let alpha = self
+                            .builder
+                            .build_extract_element(v_v, i64_type.const_int(0, false), "tof_a")
+                            .map_err(|e| format!("hbit to_float extract at op{}: {}", i, e))?;
+                        let alpha_iv = match alpha {
+                            BasicValueEnum::IntValue(iv) => iv,
+                            _ => return Err(format!("hbit to_float not int at op{}", i)),
+                        };
+                        let f = self
+                            .builder
+                            .build_signed_int_to_float(alpha_iv, f64_type, "tof")
+                            .map_err(|e| format!("hbit to_float sitofp at op{}: {}", i, e))?;
+                        let ri = self
+                            .builder
+                            .build_bit_cast(f, i64_type, "tof_i")
+                            .map_err(|e| format!("hbit to_float bitcast at op{}: {}", i, e))?
+                            .into_int_value();
+                        let new_v = self.splat(ri, "tof_v")?;
+                        stack.push(new_v);
+                        continue;
+                    }
+                    if name == "to_int" && *argc == 1 {
+                        let v_v = self.pop(&mut stack, i, "to_int arg")?;
+                        let f64_type = self.ctx.f64_type();
+                        let alpha = self
+                            .builder
+                            .build_extract_element(v_v, i64_type.const_int(0, false), "toi_a")
+                            .map_err(|e| format!("hbit to_int extract at op{}: {}", i, e))?;
+                        let alpha_iv = match alpha {
+                            BasicValueEnum::IntValue(iv) => iv,
+                            _ => return Err(format!("hbit to_int not int at op{}", i)),
+                        };
+                        let v_f = self
+                            .builder
+                            .build_bit_cast(alpha_iv, f64_type, "toi_f")
+                            .map_err(|e| format!("hbit to_int bitcast at op{}: {}", i, e))?
+                            .into_float_value();
+                        let ri = self
+                            .builder
+                            .build_float_to_signed_int(v_f, i64_type, "toi")
+                            .map_err(|e| format!("hbit to_int fptosi at op{}: {}", i, e))?;
+                        let new_v = self.splat(ri, "toi_v")?;
+                        stack.push(new_v);
+                        continue;
+                    }
                     // Resolve the call target. Self-recursion uses
                     // self.function directly. Cross-fn calls (Session
                     // H) look up `<name>_hbit` in the module's symbol
@@ -780,6 +846,49 @@ impl<'ctx, 'a> DualBandLowerer<'ctx, 'a> {
         Ok(())
     }
 
+    /// Path A.2: float-arithmetic binop on the dual-band vector.
+    /// `<2 x i64>` bitcasts to `<2 x f64>` (same 128-bit width); both
+    /// lanes get the float op in parallel; result bitcasts back to
+    /// `<2 x i64>` for stack storage. Bytecode compiler enforces
+    /// type discipline; the JIT just trusts the typed op.
+    fn bin_vec_float<F>(
+        &self,
+        stack: &mut Vec<VectorValue<'ctx>>,
+        op_idx: usize,
+        f: F,
+    ) -> Result<(), CodegenError>
+    where
+        F: FnOnce(
+            &Builder<'ctx>,
+            VectorValue<'ctx>,
+            VectorValue<'ctx>,
+        ) -> Result<VectorValue<'ctx>, inkwell::builder::BuilderError>,
+    {
+        let f64_type = self.ctx.f64_type();
+        let v2f64 = f64_type.vec_type(2);
+        let rhs = self.pop(stack, op_idx, "fbin rhs")?;
+        let lhs = self.pop(stack, op_idx, "fbin lhs")?;
+        let lhs_f = self
+            .builder
+            .build_bit_cast(lhs, v2f64, "fbin_lf")
+            .map_err(|e| format!("hbit fbin lhs cast at op{}: {}", op_idx, e))?
+            .into_vector_value();
+        let rhs_f = self
+            .builder
+            .build_bit_cast(rhs, v2f64, "fbin_rf")
+            .map_err(|e| format!("hbit fbin rhs cast at op{}: {}", op_idx, e))?
+            .into_vector_value();
+        let r_f = f(&self.builder, lhs_f, rhs_f)
+            .map_err(|e| format!("hbit fbinop at op{}: {}", op_idx, e))?;
+        let r_i = self
+            .builder
+            .build_bit_cast(r_f, self.v2i64, "fbin_ri")
+            .map_err(|e| format!("hbit fbin ret cast at op{}: {}", op_idx, e))?
+            .into_vector_value();
+        stack.push(r_i);
+        Ok(())
+    }
+
     fn cmp_vec(
         &self,
         stack: &mut Vec<VectorValue<'ctx>>,
diff --git a/omnimcode-codegen/src/lib.rs b/omnimcode-codegen/src/lib.rs
@@ -564,9 +564,17 @@ impl<'ctx, 'a> FunctionLowerer<'ctx, 'a> {
                     let v = match c {
                         Const::Int(n) => i64_type.const_int(*n as u64, true),
                         Const::Bool(b) => i64_type.const_int(*b as u64, false),
+                        Const::Float(f) => {
+                            // Path A.2: floats live on the i64 stack as
+                            // bitcast-i64. const_int(bits) gives the
+                            // raw IEEE-754 bit pattern stored as i64;
+                            // float-typed ops bitcast it back via
+                            // bin_float when consuming.
+                            i64_type.const_int(f.to_bits(), false)
+                        }
                         _ => {
                             return Err(format!(
-                                "Session B only supports Const::Int and Const::Bool, got {:?} at op{}",
+                                "scalar lowerer doesn't support {:?} at op{}",
                                 c, i
                             ));
                         }
@@ -612,6 +620,18 @@ impl<'ctx, 'a> FunctionLowerer<'ctx, 'a> {
                 Op::Mul | Op::MulInt => self.bin_int(&mut stack, i, |b, l, r| b.build_int_mul(l, r, "mul"))?,
                 Op::Div => self.bin_int(&mut stack, i, |b, l, r| b.build_int_signed_div(l, r, "div"))?,
                 Op::Mod => self.bin_int(&mut stack, i, |b, l, r| b.build_int_signed_rem(l, r, "rem"))?,
+                // Float arithmetic — Path A.2.
+                //
+                // Floats live on the stack as bitcast-i64 (the slot
+                // type is uniform i64 throughout the lowerer; floats
+                // are interpreted via bitcast at the float-op boundary
+                // and bitcast back to i64 for storage). The bytecode
+                // compiler only emits the Float-typed ops when it has
+                // statically-typed-float operands, so the bitcast
+                // assumption is sound at the bytecode level.
+                Op::AddFloat => self.bin_float(&mut stack, i, |b, l, r| b.build_float_add(l, r, "fadd"))?,
+                Op::SubFloat => self.bin_float(&mut stack, i, |b, l, r| b.build_float_sub(l, r, "fsub"))?,
+                Op::MulFloat => self.bin_float(&mut stack, i, |b, l, r| b.build_float_mul(l, r, "fmul"))?,
                 Op::Neg => {
                     let v = pop(&mut stack, i, "Neg")?;
                     let zero = i64_type.const_int(0, false);
@@ -767,6 +787,37 @@ impl<'ctx, 'a> FunctionLowerer<'ctx, 'a> {
                 }
 
                 Op::Call(name, argc) => {
+                    // Path A.2 intrinsics: int↔float boundary.
+                    if name == "to_float" && *argc == 1 {
+                        let v = pop(&mut stack, i, "to_float arg")?;
+                        let f64_type = self.ctx.f64_type();
+                        let f = self
+                            .builder
+                            .build_signed_int_to_float(v, f64_type, "tof")
+                            .map_err(|e| format!("to_float sitofp at op{}: {}", i, e))?;
+                        let ri = self
+                            .builder
+                            .build_bit_cast(f, i64_type, "tof_i")
+                            .map_err(|e| format!("to_float bitcast at op{}: {}", i, e))?
+                            .into_int_value();
+                        stack.push(ri);
+                        continue;
+                    }
+                    if name == "to_int" && *argc == 1 {
+                        let v_i = pop(&mut stack, i, "to_int arg")?;
+                        let f64_type = self.ctx.f64_type();
+                        let v_f = self
+                            .builder
+                            .build_bit_cast(v_i, f64_type, "toi_f")
+                            .map_err(|e| format!("to_int bitcast at op{}: {}", i, e))?
+                            .into_float_value();
+                        let ri = self
+                            .builder
+                            .build_float_to_signed_int(v_f, i64_type, "toi")
+                            .map_err(|e| format!("to_int fptosi at op{}: {}", i, e))?;
+                        stack.push(ri);
+                        continue;
+                    }
                     // Session B: only recursive self-calls. Cross-fn
                     // calls (Session D) need a callable-resolution
                     // strategy — currently routed through tree-walk's
@@ -873,6 +924,53 @@ impl<'ctx, 'a> FunctionLowerer<'ctx, 'a> {
         Ok(())
     }
 
+    /// Path A.2: float-arithmetic binop. The stack holds i64s; the
+    /// operands are interpreted as f64 via bitcast. Result is bitcast
+    /// back to i64 for storage. Caller is responsible for ensuring
+    /// the operands actually contain float bit-patterns (the bytecode
+    /// compiler enforces this via its typed AddFloat/SubFloat/MulFloat
+    /// emission; the JIT just trusts the typed op).
+    fn bin_float<F>(
+        &self,
+        stack: &mut Vec<inkwell::values::IntValue<'ctx>>,
+        op_idx: usize,
+        f: F,
+    ) -> Result<(), CodegenError>
+    where
+        F: FnOnce(
+            &Builder<'ctx>,
+            inkwell::values::FloatValue<'ctx>,
+            inkwell::values::FloatValue<'ctx>,
+        ) -> Result<
+            inkwell::values::FloatValue<'ctx>,
+            inkwell::builder::BuilderError,
+        >,
+    {
+        let f64_type = self.ctx.f64_type();
+        let i64_type = self.ctx.i64_type();
+        let rhs_i = pop(stack, op_idx, "fbin rhs")?;
+        let lhs_i = pop(stack, op_idx, "fbin lhs")?;
+        let rhs_f = self
+            .builder
+            .build_bit_cast(rhs_i, f64_type, "fbin_rf")
+            .map_err(|e| format!("fbin rhs cast at op{}: {}", op_idx, e))?
+            .into_float_value();
+        let lhs_f = self
+            .builder
+            .build_bit_cast(lhs_i, f64_type, "fbin_lf")
+            .map_err(|e| format!("fbin lhs cast at op{}: {}", op_idx, e))?
+            .into_float_value();
+        let r_f = f(&self.builder, lhs_f, rhs_f)
+            .map_err(|e| format!("fbinop at op{}: {}", op_idx, e))?;
+        let r_i = self
+            .builder
+            .build_bit_cast(r_f, i64_type, "fbin_ri")
+            .map_err(|e| format!("fbin ret cast at op{}: {}", op_idx, e))?
+            .into_int_value();
+        stack.push(r_i);
+        Ok(())
+    }
+
     fn cmp_op(
         &self,
         stack: &mut Vec<IntValue<'ctx>>,
diff --git a/omnimcode-codegen/tests/jit_floats.rs b/omnimcode-codegen/tests/jit_floats.rs
@@ -0,0 +1,119 @@
+//! Path A.2 — f64 support in scalar JIT lowerer.
+//!
+//! Floats are represented on the i64-shaped operand stack as bitcast
+//! IEEE-754 bit patterns. Float-typed ops (AddFloat / SubFloat /
+//! MulFloat) and the to_int / to_float intrinsics handle the bitcast
+//! at their boundary. The bytecode compiler emits the typed float ops
+//! when it has statically-typed-float operands; the JIT trusts the
+//! type discipline.
+//!
+//! Caller-facing fn signature stays scalar i64 in / i64 out. Float
+//! locals and intermediates are fine; the body must convert to int
+//! at the return boundary (or via `to_int`).
+
+#![cfg(feature = "llvm-jit")]
+
+use inkwell::context::Context;
+use omnimcode_codegen::JitContext;
+use omnimcode_core::parser::Parser;
+
+fn jit(source: &str, fn_name: &str) -> (Context, omnimcode_codegen::JittedFn) {
+    let mut parser = Parser::new(source);
+    let statements = parser.parse().expect("parse");
+    let module = omnimcode_core::compiler::compile_program(&statements).expect("compile");
+    let ctx = Context::create();
+    let jit = JitContext::new(&ctx).expect("jit");
+    let jitted = jit.jit_module(&module).expect("jit_module");
+    let f = *jitted.get(fn_name).expect("fn JIT'd");
+    drop(jitted);
+    drop(jit);
+    (ctx, f)
+}
+
+#[test]
+fn float_round_trip_to_int_and_back() {
+    // to_int(to_float(x)) should round-trip an integer through the
+    // float bit-pattern path.
+    let source = r#"
+        fn round_trip(x) {
+            return to_int(to_float(x));
+        }
+    "#;
+    // Need to keep the JitContext alive while calling — use a longer-
+    // lived setup than `jit()` here since `jit` drops the JitContext
+    // at fn end. Inline the equivalent here.
+    let mut parser = Parser::new(source);
+    let statements = parser.parse().expect("parse");
+    let module = omnimcode_core::compiler::compile_program(&statements).expect("compile");
+    let ctx = Context::create();
+    let jit = JitContext::new(&ctx).expect("jit");
+    let jitted = jit.jit_module(&module).expect("jit_module");
+    let f = jitted.get("round_trip").expect("round_trip JIT'd");
+    for x in &[0i64, 1, 42, -7, 1_000_000, -1_000_000] {
+        assert_eq!(f.call(&[*x]).expect("call"), *x);
+    }
+}
+
+#[test]
+fn float_arithmetic_via_to_float() {
+    // fn area(r) { return to_int(to_float(r) * to_float(r)); }
+    // For r=10: r*r = 100.0 → to_int → 100
+    let source = r#"
+        fn area(r) {
+            h rf = to_float(r);
+            return to_int(rf * rf);
+        }
+    "#;
+    let mut parser = Parser::new(source);
+    let statements = parser.parse().expect("parse");
+    let module = omnimcode_core::compiler::compile_program(&statements).expect("compile");
+    let ctx = Context::create();
+    let jit = JitContext::new(&ctx).expect("jit");
+    let jitted = jit.jit_module(&module).expect("jit_module");
+    let f = jitted.get("area").expect("area JIT'd");
+    assert_eq!(f.call(&[10]).expect("call"), 100);
+    assert_eq!(f.call(&[3]).expect("call"), 9);
+    assert_eq!(f.call(&[0]).expect("call"), 0);
+    assert_eq!(f.call(&[100]).expect("call"), 10_000);
+}
+
+#[test]
+fn float_loop_accumulator() {
+    // Float Add/Sub/Mul in a loop. Computes
+    //   sum_squares(n) = 1² + 2² + … + n²    (in float space)
+    // returned as int. Closed form: n(n+1)(2n+1)/6.
+    //
+    // Note: no Div in this test because the OMC compiler doesn't yet
+    // emit a DivFloat op (plain Op::Div is always emitted, which the
+    // JIT treats as signed integer division). Float division is on
+    // the deferred list with array support and AVX-512 widening.
+    let source = r#"
+        fn sum_squares(n) {
+            h sum = 0.0;
+            h k = 1;
+            while k <= n {
+                h kf = to_float(k);
+                sum = sum + kf * kf;
+                k = k + 1;
+            }
+            return to_int(sum);
+        }
+    "#;
+    let mut parser = Parser::new(source);
+    let statements = parser.parse().expect("parse");
+    let module = omnimcode_core::compiler::compile_program(&statements).expect("compile");
+    let ctx = Context::create();
+    let jit = JitContext::new(&ctx).expect("jit");
+    let jitted = jit.jit_module(&module).expect("jit_module");
+    let f = jitted.get("sum_squares").expect("sum_squares JIT'd");
+    // 1² = 1
+    assert_eq!(f.call(&[1]).expect("call"), 1);
+    // 1² + 2² = 5
+    assert_eq!(f.call(&[2]).expect("call"), 5);
+    // 1² + 2² + 3² = 14
+    assert_eq!(f.call(&[3]).expect("call"), 14);
+    // 1² + … + 10² = 385
+    assert_eq!(f.call(&[10]).expect("call"), 385);
+    // 1² + … + 100² = 338350
+    assert_eq!(f.call(&[100]).expect("call"), 338_350);
+}