Skip to content

Commit 0d1b6e0

Browse files
Feat: Implement 'BigInt' type, including fixes for missaligned types and tests.
1 parent 69de343 commit 0d1b6e0

File tree

11 files changed

+497
-120
lines changed

11 files changed

+497
-120
lines changed

compiler/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
## Edge Python
22

3-
Single-pass SSA compiler for Python on the edge computing: hand-written lexer, token-to-bytecode parser, adaptive virtual machine with NaN-boxed values, inline caching, template memoization, mark-sweep garbage collector, and configurable sandbox limits. Native and WASM targets.
3+
Single-pass SSA compiler for Python 3.13: hand-written lexer, token-to-bytecode parser, adaptive virtual machine with NaN-boxed values, inline caching, template memoization, mark-sweep garbage collector, and configurable sandbox limits. Native and WASM targets.
44

55
---
66

@@ -74,7 +74,7 @@ print(counter)
7474

7575
### Value Representation
7676

77-
NaN-boxed 64-bit: integers are 48-bit signed ($\pm 2^{47}$), overflow promotes to float (Gudeman, 1993). Results exceeding 48-bit range lose integer precision, consistent with Lua 5.3 and PHP 8. Heap index is 28-bit ($2^{28}$ objects max, returns `MemoryError` beyond).
77+
NaN-boxed 64-bit: integers are 48-bit signed ($\pm 2^{47}$) for inline storage; values outside this range are heap-allocated as arbitrary-precision `BigInt` (base-$2^{32}$ limb array, sign-magnitude), matching Python's unbounded `int` semantics. True division (`/`) always yields `float`. Heap index is 28-bit ($2^{28}$ objects max, returns `MemoryError` beyond).
7878

7979
### Building for WebAssembly
8080

@@ -147,4 +147,4 @@ cargo test --features wasm-tests
147147

148148
### License
149149

150-
MIT OR Apache-2.0
150+
MIT OR Apache-2.0

compiler/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ fn run(path: &str, _q: bool, sandbox: bool) -> Result<(), String> {
5555
}
5656

5757
fn main() {
58-
let (p, v, q, sandbox) = parse_args();
58+
let (p, _v, q, sandbox) = parse_args();
5959

6060
let default_level = if q { "error" } else { "info" };
6161
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(default_level)).init();

compiler/src/modules/parser/expr.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -243,18 +243,57 @@ impl<'src, I: Iterator<Item = Token>> Parser<'src, I> {
243243
let s = raw.replace('_', "");
244244
if kind == TokenType::Float {
245245
self.emit_const(Value::Float(s.parse().unwrap_or(0.0)));
246-
} else {
247-
let v = if let Some(s) = s.strip_prefix("0x").or(s.strip_prefix("0X")) {
248-
i64::from_str_radix(s, 16).unwrap_or(0)
249-
} else if let Some(s) = s.strip_prefix("0o").or(s.strip_prefix("0O")) {
250-
i64::from_str_radix(s, 8).unwrap_or(0)
251-
} else if let Some(s) = s.strip_prefix("0b").or(s.strip_prefix("0B")) {
252-
i64::from_str_radix(s, 2).unwrap_or(0)
246+
return;
247+
}
248+
let maybe: Option<i64> =
249+
if let Some(h) = s.strip_prefix("0x").or(s.strip_prefix("0X")) {
250+
i64::from_str_radix(h, 16).ok()
251+
} else if let Some(o) = s.strip_prefix("0o").or(s.strip_prefix("0O")) {
252+
i64::from_str_radix(o, 8).ok()
253+
} else if let Some(b) = s.strip_prefix("0b").or(s.strip_prefix("0B")) {
254+
i64::from_str_radix(b, 2).ok()
253255
} else {
254-
s.parse().unwrap_or(0)
256+
s.parse().ok()
255257
};
256-
self.emit_const(Value::Int(v));
258+
259+
match maybe {
260+
Some(v) => self.emit_const(Value::Int(v)),
261+
None => {
262+
let dec =
263+
if let Some(h) = s.strip_prefix("0x").or(s.strip_prefix("0X")) {
264+
Self::big_base_to_dec(h, 16)
265+
} else if let Some(o) = s.strip_prefix("0o").or(s.strip_prefix("0O")) {
266+
Self::big_base_to_dec(o, 8)
267+
} else if let Some(b) = s.strip_prefix("0b").or(s.strip_prefix("0B")) {
268+
Self::big_base_to_dec(b, 2)
269+
} else {
270+
s
271+
};
272+
self.emit_const(Value::BigInt(dec));
273+
}
274+
}
275+
}
276+
277+
fn big_base_to_dec(s: &str, base: u32) -> String {
278+
const DEC: u64 = 1_000_000_000;
279+
let mut limbs: Vec<u32> = vec![0];
280+
for c in s.chars() {
281+
let d = c.to_digit(base).unwrap_or(0) as u64;
282+
let mut carry = d;
283+
for limb in limbs.iter_mut() {
284+
let cur = *limb as u64 * base as u64 + carry;
285+
*limb = (cur % DEC) as u32;
286+
carry = cur / DEC;
287+
}
288+
if carry != 0 { limbs.push(carry as u32); }
289+
}
290+
let mut out = String::new();
291+
for (i, &l) in limbs.iter().rev().enumerate() {
292+
if i == 0 { out.push_str(&format!("{}", l)); }
293+
else { out.push_str(&format!("{:09}", l)); }
257294
}
295+
if out.is_empty() { out.push('0'); }
296+
out
258297
}
259298

260299
/*

compiler/src/modules/parser/types.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Value
6767
pub enum Value {
6868
Str(String),
6969
Int(i64),
70+
BigInt(String),
7071
Float(f64),
7172
Bool(bool),
7273
None,

compiler/src/modules/vm/builtins.rs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,21 @@ impl<'a> VM<'a> {
4848
let o = self.pop()?;
4949
if o.is_int() {
5050
let r = (o.as_int() as i128).abs();
51-
self.push(if r <= Val::INT_MAX as i128 { Val::int(r as i64) } else { Val::float(r as f64) });
51+
let v = self.i128_to_val(r)?;
52+
self.push(v);
53+
} else if o.is_float() {
54+
self.push(Val::float(o.as_float().abs()));
55+
} else if o.is_heap() {
56+
if let HeapObj::BigInt(b) = self.heap.get(o) {
57+
let ab = b.abs();
58+
let v = self.bigint_to_val(ab)?;
59+
self.push(v);
60+
} else {
61+
return Err(VmErr::Type("abs()".into()));
62+
}
63+
} else {
64+
return Err(VmErr::Type("abs()".into()));
5265
}
53-
else if o.is_float() { self.push(Val::float(o.as_float().abs())); }
54-
else { return Err(VmErr::Type("abs()".into())); }
5566
Ok(())
5667
}
5768

@@ -72,6 +83,16 @@ impl<'a> VM<'a> {
7283

7384
pub fn call_int(&mut self) -> Result<(), VmErr> {
7485
let o = self.pop()?;
86+
if o.is_heap() {
87+
if let HeapObj::BigInt(b) = self.heap.get(o) {
88+
let pushed = {
89+
let b = b.clone();
90+
self.bigint_to_val(b)?
91+
};
92+
self.push(pushed);
93+
return Ok(());
94+
}
95+
}
7596
let i = if o.is_int() { o.as_int() }
7697
else if o.is_float() { o.as_float() as i64 }
7798
else if o.is_bool() { o.as_bool() as i64 }
@@ -80,7 +101,8 @@ impl<'a> VM<'a> {
80101
_ => return Err(VmErr::Type("int()".into())),
81102
}}
82103
else { return Err(VmErr::Type("int()".into())); };
83-
self.push(Val::int(i)); Ok(())
104+
let v = self.bigint_to_val(BigInt::from_i64(i))?;
105+
self.push(v); Ok(())
84106
}
85107

86108
/*
@@ -94,7 +116,8 @@ impl<'a> VM<'a> {
94116
else if o.is_int() { o.as_int() as f64 }
95117
else if o.is_heap() { match self.heap.get(o) {
96118
HeapObj::Str(s) => s.trim().parse().map_err(|_| VmErr::Value(format!("float: '{}'", s)))?,
97-
_ => return Err(VmErr::Type("float()".into())),
119+
HeapObj::BigInt(b) => b.to_f64(),
120+
_ => return Err(VmErr::Type("float()".into()))
98121
}}
99122
else { return Err(VmErr::Type("float()".into())); };
100123
self.push(Val::float(f)); Ok(())
@@ -105,8 +128,12 @@ impl<'a> VM<'a> {
105128
}
106129

107130
pub fn call_type(&mut self) -> Result<(), VmErr> {
108-
let o = self.pop()?; let s = self.type_name(o);
109-
let v = self.heap.alloc(HeapObj::Str(s.into()))?; self.push(v); Ok(())
131+
let o = self.pop()?;
132+
let s = self.type_name(o);
133+
let full = format!("<class '{}'>", s);
134+
let v = self.heap.alloc(HeapObj::Str(full))?;
135+
self.push(v);
136+
Ok(())
110137
}
111138

112139
pub fn call_chr(&mut self) -> Result<(), VmErr> {
@@ -164,6 +191,7 @@ impl<'a> VM<'a> {
164191
}
165192
(Some(o), None) if o.is_float() => Val::int(fround(o.as_float()) as i64),
166193
(Some(o), _) if o.is_int() => *o,
194+
(Some(o), _) if o.is_heap() && matches!(self.heap.get(*o), HeapObj::BigInt(_)) => *o,
167195
_ => return Err(VmErr::Type("round()".into())),
168196
};
169197
self.push(v); Ok(())

compiler/src/modules/vm/cache.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ fn eq_vals_heap(a: Val, b: Val, heap: &super::types::HeapPool) -> bool {
7171
if !a.is_heap() || !b.is_heap() { return a.0 == b.0; }
7272
// Content-based Val comparison for cache lookups, recursing into collections.
7373
match (heap.get(a), heap.get(b)) {
74+
(HeapObj::BigInt(x), HeapObj::BigInt(y)) => x.cmp(y) == core::cmp::Ordering::Equal,
7475
(HeapObj::Str(x), HeapObj::Str(y)) => x == y,
7576
(HeapObj::Tuple(x), HeapObj::Tuple(y)) => eq_seq(x, y, |a,b| eq_vals_heap(a,b,heap)),
7677
(HeapObj::List(x), HeapObj::List(y)) => eq_seq(&x.borrow(), &y.borrow(), |a,b| eq_vals_heap(a,b,heap)),

compiler/src/modules/vm/mod.rs

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,14 @@ impl<'a> VM<'a> {
128128

129129
pub(crate) fn to_val(&mut self, v: &Value) -> Result<Val, VmErr> {
130130
Ok(match v {
131-
Value::Int(i) => Val::int_checked(*i).unwrap_or_else(|| Val::float(*i as f64)),
131+
Value::Int(i) => {
132+
if *i >= Val::INT_MIN && *i <= Val::INT_MAX {
133+
Val::int(*i)
134+
} else {
135+
self.heap.alloc(HeapObj::BigInt(BigInt::from_i64(*i)))?
136+
}
137+
}
138+
Value::BigInt(s) => self.heap.alloc(HeapObj::BigInt(BigInt::from_decimal(s)))?,
132139
Value::Float(f) => Val::float(*f),
133140
Value::Bool(b) => Val::bool(*b),
134141
Value::None => Val::none(),
@@ -252,57 +259,64 @@ impl<'a> VM<'a> {
252259
OpCode::Mul => { let (a, b) = self.pop2()?; cached_binop!(self.heap, rip, &ins.opcode, a, b, cache, adaptive); let v = self.mul_vals(a, b)?; self.push(v); }
253260
OpCode::Div => { let (a, b) = self.pop2()?; let v = self.div_vals(a, b)?; self.push(v); }
254261
OpCode::Mod => {
255-
let (a, b) = self.pop2()?;
256-
if !a.is_int() || !b.is_int() { return Err(VmErr::Type("mod requires int".into())); }
257-
let d = b.as_int(); if d == 0 { return Err(VmErr::ZeroDiv); }
258-
let r = a.as_int() % d;
259-
self.push(Val::int(if r != 0 && (r < 0) != (d < 0) { r + d } else { r }));
262+
let (a, b) = self.pop2()?;
263+
if let (Some(ba), Some(bb)) = (self.to_bigint(a), self.to_bigint(b)) {
264+
let (_, r) = ba.divmod(&bb).ok_or(VmErr::ZeroDiv)?;
265+
let v = self.bigint_to_val(r)?;
266+
self.push(v);
267+
} else {
268+
return Err(VmErr::Type("mod requires int".into()));
269+
}
260270
}
261271
OpCode::Pow => {
262272
let (a, b) = self.pop2()?;
263-
let v = match (a.is_int(), b.is_int()) {
264-
(true, true) => {
273+
if let Some(ba) = self.to_bigint(a) {
274+
if b.is_int() {
265275
let exp = b.as_int();
266276
if exp >= 0 {
267-
match (a.as_int() as i128).checked_pow(exp as u32) {
268-
Some(result) if result >= Val::INT_MIN as i128
269-
&& result <= Val::INT_MAX as i128 => Val::int(result as i64),
270-
Some(result) => Val::float(result as f64),
271-
None => Val::float(fpowi(a.as_int() as f64, exp as i32)),
272-
}
273-
} else {
274-
Val::float(fpowi(a.as_int() as f64, exp as i32))
277+
let result = ba.pow_u32(exp as u32);
278+
let v = self.bigint_to_val(result)?;
279+
self.push(v);
280+
continue;
275281
}
282+
self.push(Val::float(fpowi(ba.to_f64(), exp as i32)));
283+
continue;
276284
}
277-
_ => {
278-
let fa = if a.is_int() { a.as_int() as f64 } else if a.is_float() { a.as_float() }
279-
else { return Err(VmErr::Type("'**' requires numeric operands".into())); };
280-
let fb = if b.is_int() { b.as_int() as f64 } else if b.is_float() { b.as_float() }
281-
else { return Err(VmErr::Type("'**' requires numeric operands".into())); };
282-
Val::float(fpowf(fa, fb))
283-
}
284-
};
285-
self.push(v);
285+
}
286+
let fa = if a.is_int() { a.as_int() as f64 } else if a.is_float() { a.as_float() }
287+
else { return Err(VmErr::Type("'**' requires numeric operands".into())); };
288+
let fb = if b.is_int() { b.as_int() as f64 } else if b.is_float() { b.as_float() }
289+
else { return Err(VmErr::Type("'**' requires numeric operands".into())); };
290+
self.push(Val::float(fpowf(fa, fb)));
286291
}
287292
OpCode::FloorDiv => {
288293
let (a, b) = self.pop2()?;
289-
if !a.is_int() || !b.is_int() { return Err(VmErr::Type("// requires int".into())); }
290-
let d = b.as_int(); if d == 0 { return Err(VmErr::ZeroDiv); }
291-
let (q, r) = (a.as_int() / d, a.as_int() % d);
292-
self.push(Val::int(if r != 0 && (r < 0) != (d < 0) { q - 1 } else { q }));
294+
if let (Some(ba), Some(bb)) = (self.to_bigint(a), self.to_bigint(b)) {
295+
let (q, _) = ba.divmod(&bb).ok_or(VmErr::ZeroDiv)?;
296+
let v = self.bigint_to_val(q)?;
297+
self.push(v);
298+
} else {
299+
return Err(VmErr::Type("// requires int".into()));
300+
}
293301
}
294302
OpCode::Minus => {
295303
let v = self.pop()?;
296304
if v.is_int() {
297-
let r = -(v.as_int() as i128);
298-
self.push(if r >= Val::INT_MIN as i128 && r <= Val::INT_MAX as i128 {
299-
Val::int(r as i64)
305+
let pushed = self.i128_to_val(-(v.as_int() as i128))?;
306+
self.push(pushed);
307+
} else if v.is_float() {
308+
self.push(Val::float(-v.as_float()));
309+
} else if v.is_heap() {
310+
if let HeapObj::BigInt(b) = self.heap.get(v) {
311+
let neg = b.neg();
312+
let pushed = self.bigint_to_val(neg)?;
313+
self.push(pushed);
300314
} else {
301-
Val::float(r as f64)
302-
});
315+
return Err(VmErr::Type("unary -".into()));
316+
}
317+
} else {
318+
return Err(VmErr::Type("unary -".into()));
303319
}
304-
else if v.is_float() { self.push(Val::float(-v.as_float())); }
305-
else { return Err(VmErr::Type("unary -".into())); }
306320
}
307321

308322
// Bitwise

0 commit comments

Comments
 (0)